1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 3 4; 64-bit divides and rems should be split into a fast and slow path 5; where the fast path uses a 32-bit operation. 6 7define i64 @sdiv64(i64 %a, i64 %b) { 8; GFX9-LABEL: sdiv64: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 12; GFX9-NEXT: v_mov_b32_e32 v4, 0 13; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 14; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 15; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 16; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 17; GFX9-NEXT: s_cbranch_execz .LBB0_2 18; GFX9-NEXT: ; %bb.1: 19; GFX9-NEXT: v_ashrrev_i32_e32 v9, 31, v3 20; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v9 21; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v9, vcc 22; GFX9-NEXT: v_xor_b32_e32 v10, v3, v9 23; GFX9-NEXT: v_xor_b32_e32 v11, v2, v9 24; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v11 25; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v10 26; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v11 27; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v10, vcc 28; GFX9-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 29; GFX9-NEXT: v_rcp_f32_e32 v2, v2 30; GFX9-NEXT: v_mov_b32_e32 v14, 0 31; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 32; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 33; GFX9-NEXT: v_trunc_f32_e32 v3, v3 34; GFX9-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 35; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2 36; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v3 37; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6 38; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0 39; GFX9-NEXT: v_mul_lo_u32 v5, v7, v12 40; GFX9-NEXT: v_mul_hi_u32 v13, v6, v2 41; GFX9-NEXT: v_add3_u32 v5, v3, v5, v4 42; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 43; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v13, v3 44; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v2, 0 45; GFX9-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v4, vcc 46; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v5, 0 47; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2 48; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v15, v3, vcc 49; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v14, vcc 50; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 51; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc 52; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v6, v2 53; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v12, v3, vcc 54; GFX9-NEXT: v_mul_lo_u32 v4, v7, v12 55; GFX9-NEXT: v_mul_lo_u32 v5, v8, v13 56; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v13, 0 57; GFX9-NEXT: v_add3_u32 v5, v3, v4, v5 58; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v5, 0 59; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v5, 0 60; GFX9-NEXT: v_mul_hi_u32 v15, v13, v2 61; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v2, 0 62; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v15, v5 63; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 64; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 65; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v5, v8, vcc 66; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v14, vcc 67; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 68; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc 69; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2 70; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v12, v3, vcc 71; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v1 72; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4 73; GFX9-NEXT: v_xor_b32_e32 v6, v0, v4 74; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v4, vcc 75; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v3, 0 76; GFX9-NEXT: v_mul_hi_u32 v7, v6, v2 77; GFX9-NEXT: v_xor_b32_e32 v5, v5, v4 78; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v0 79; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc 80; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v2, 0 81; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v3, 0 82; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 83; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v8, v1, vcc 84; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v14, vcc 85; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2 86; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc 87; GFX9-NEXT: v_mul_lo_u32 v7, v10, v2 88; GFX9-NEXT: v_mul_lo_u32 v8, v11, v3 89; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v2, 0 90; GFX9-NEXT: v_add3_u32 v1, v1, v8, v7 91; GFX9-NEXT: v_sub_u32_e32 v7, v5, v1 92; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v6, v0 93; GFX9-NEXT: v_subb_co_u32_e64 v6, s[4:5], v7, v10, vcc 94; GFX9-NEXT: v_sub_co_u32_e64 v7, s[4:5], v0, v11 95; GFX9-NEXT: v_subbrev_co_u32_e64 v6, s[4:5], 0, v6, s[4:5] 96; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v10 97; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 98; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v11 99; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] 100; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], v6, v10 101; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, v7, s[4:5] 102; GFX9-NEXT: v_add_co_u32_e64 v7, s[4:5], 2, v2 103; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v5, v1, vcc 104; GFX9-NEXT: v_addc_co_u32_e64 v8, s[4:5], 0, v3, s[4:5] 105; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 106; GFX9-NEXT: v_add_co_u32_e64 v12, s[4:5], 1, v2 107; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 108; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v11 109; GFX9-NEXT: v_addc_co_u32_e64 v13, s[4:5], 0, v3, s[4:5] 110; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 111; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v10 112; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6 113; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc 114; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 115; GFX9-NEXT: v_cndmask_b32_e64 v1, v12, v7, s[4:5] 116; GFX9-NEXT: v_cndmask_b32_e64 v6, v13, v8, s[4:5] 117; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 118; GFX9-NEXT: v_xor_b32_e32 v2, v4, v9 119; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc 120; GFX9-NEXT: v_xor_b32_e32 v1, v1, v2 121; GFX9-NEXT: v_xor_b32_e32 v0, v0, v2 122; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v1, v2 123; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v0, v2, vcc 124; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 125; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 126; GFX9-NEXT: .LBB0_2: ; %Flow 127; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] 128; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5] 129; GFX9-NEXT: s_cbranch_execz .LBB0_4 130; GFX9-NEXT: ; %bb.3: 131; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 132; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 133; GFX9-NEXT: v_mov_b32_e32 v5, 0 134; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 135; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 136; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 137; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 138; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 139; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 140; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 141; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 142; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 143; GFX9-NEXT: v_sub_u32_e32 v0, v0, v3 144; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 145; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 146; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 147; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 148; GFX9-NEXT: v_add_u32_e32 v3, 1, v1 149; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 150; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v3, vcc 151; GFX9-NEXT: .LBB0_4: 152; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 153; GFX9-NEXT: v_mov_b32_e32 v0, v4 154; GFX9-NEXT: v_mov_b32_e32 v1, v5 155; GFX9-NEXT: s_setpc_b64 s[30:31] 156 %d = sdiv i64 %a, %b 157 ret i64 %d 158} 159 160define i64 @udiv64(i64 %a, i64 %b) { 161; GFX9-LABEL: udiv64: 162; GFX9: ; %bb.0: 163; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 164; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 165; GFX9-NEXT: v_mov_b32_e32 v4, 0 166; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 167; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 168; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 169; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 170; GFX9-NEXT: s_cbranch_execz .LBB1_2 171; GFX9-NEXT: ; %bb.1: 172; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v2 173; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3 174; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2 175; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc 176; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 177; GFX9-NEXT: v_rcp_f32_e32 v4, v4 178; GFX9-NEXT: v_mov_b32_e32 v13, 0 179; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 180; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 181; GFX9-NEXT: v_trunc_f32_e32 v5, v5 182; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 183; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5 184; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4 185; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8 186; GFX9-NEXT: v_mul_lo_u32 v7, v11, v9 187; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0 188; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 189; GFX9-NEXT: v_mul_hi_u32 v12, v9, v4 190; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v7, 0 191; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v5 192; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v8, v4, 0 193; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, 0, v6, vcc 194; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v7, 0 195; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v12, v4 196; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v14, v5, vcc 197; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v7, v13, vcc 198; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6 199; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc 200; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v9, v4 201; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, v8, v5, vcc 202; GFX9-NEXT: v_mul_lo_u32 v6, v10, v14 203; GFX9-NEXT: v_mul_lo_u32 v7, v11, v12 204; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v12, 0 205; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 206; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v14, v7, 0 207; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v7, 0 208; GFX9-NEXT: v_mul_hi_u32 v11, v12, v4 209; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v14, v4, 0 210; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v11, v7 211; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v8, vcc 212; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 213; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v10, vcc 214; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v6, v13, vcc 215; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v5 216; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 217; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v12, v4 218; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v14, v5, vcc 219; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v7, 0 220; GFX9-NEXT: v_mul_hi_u32 v8, v0, v6 221; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v4 222; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v5, vcc 223; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v6, 0 224; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v7, 0 225; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v8, v4 226; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v9, v5, vcc 227; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v7, v13, vcc 228; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v4, v6 229; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v5, vcc 230; GFX9-NEXT: v_mul_lo_u32 v8, v3, v6 231; GFX9-NEXT: v_mul_lo_u32 v9, v2, v7 232; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, 0 233; GFX9-NEXT: v_add3_u32 v5, v5, v9, v8 234; GFX9-NEXT: v_sub_u32_e32 v8, v1, v5 235; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4 236; GFX9-NEXT: v_subb_co_u32_e64 v4, s[4:5], v8, v3, vcc 237; GFX9-NEXT: v_sub_co_u32_e64 v8, s[4:5], v0, v2 238; GFX9-NEXT: v_subbrev_co_u32_e64 v4, s[4:5], 0, v4, s[4:5] 239; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v3 240; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 241; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v2 242; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 243; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v3 244; GFX9-NEXT: v_cndmask_b32_e64 v4, v9, v8, s[4:5] 245; GFX9-NEXT: v_add_co_u32_e64 v8, s[4:5], 2, v6 246; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v5, vcc 247; GFX9-NEXT: v_addc_co_u32_e64 v9, s[4:5], 0, v7, s[4:5] 248; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 249; GFX9-NEXT: v_add_co_u32_e64 v10, s[4:5], 1, v6 250; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 251; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 252; GFX9-NEXT: v_addc_co_u32_e64 v11, s[4:5], 0, v7, s[4:5] 253; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 254; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 255; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4 256; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc 257; GFX9-NEXT: v_cndmask_b32_e64 v4, v11, v9, s[4:5] 258; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 259; GFX9-NEXT: v_cndmask_b32_e64 v0, v10, v8, s[4:5] 260; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v4, vcc 261; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc 262; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 263; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 264; GFX9-NEXT: .LBB1_2: ; %Flow 265; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] 266; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5] 267; GFX9-NEXT: s_cbranch_execz .LBB1_4 268; GFX9-NEXT: ; %bb.3: 269; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 270; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 271; GFX9-NEXT: v_mov_b32_e32 v5, 0 272; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 273; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 274; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 275; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 276; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 277; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 278; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 279; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 280; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 281; GFX9-NEXT: v_sub_u32_e32 v0, v0, v3 282; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 283; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 284; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 285; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 286; GFX9-NEXT: v_add_u32_e32 v3, 1, v1 287; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 288; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v3, vcc 289; GFX9-NEXT: .LBB1_4: 290; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 291; GFX9-NEXT: v_mov_b32_e32 v0, v4 292; GFX9-NEXT: v_mov_b32_e32 v1, v5 293; GFX9-NEXT: s_setpc_b64 s[30:31] 294 %d = udiv i64 %a, %b 295 ret i64 %d 296} 297 298define i64 @srem64(i64 %a, i64 %b) { 299; GFX9-LABEL: srem64: 300; GFX9: ; %bb.0: 301; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 302; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 303; GFX9-NEXT: v_mov_b32_e32 v4, 0 304; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 305; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 306; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 307; GFX9-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 308; GFX9-NEXT: s_cbranch_execz .LBB2_2 309; GFX9-NEXT: ; %bb.1: 310; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v3 311; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 312; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v4, vcc 313; GFX9-NEXT: v_xor_b32_e32 v9, v3, v4 314; GFX9-NEXT: v_xor_b32_e32 v10, v2, v4 315; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v10 316; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v9 317; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v10 318; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v9, vcc 319; GFX9-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 320; GFX9-NEXT: v_rcp_f32_e32 v2, v2 321; GFX9-NEXT: v_mov_b32_e32 v13, 0 322; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 323; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 324; GFX9-NEXT: v_trunc_f32_e32 v3, v3 325; GFX9-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 326; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2 327; GFX9-NEXT: v_cvt_u32_f32_e32 v11, v3 328; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6 329; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0 330; GFX9-NEXT: v_mul_lo_u32 v5, v7, v11 331; GFX9-NEXT: v_mul_hi_u32 v12, v6, v2 332; GFX9-NEXT: v_add3_u32 v5, v3, v5, v4 333; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 334; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v3 335; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v2, 0 336; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, 0, v4, vcc 337; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v5, 0 338; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v12, v2 339; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v14, v3, vcc 340; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v13, vcc 341; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 342; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc 343; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v6, v2 344; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v11, v3, vcc 345; GFX9-NEXT: v_mul_lo_u32 v4, v7, v11 346; GFX9-NEXT: v_mul_lo_u32 v5, v8, v12 347; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v12, 0 348; GFX9-NEXT: v_add3_u32 v5, v3, v4, v5 349; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v5, 0 350; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v5, 0 351; GFX9-NEXT: v_mul_hi_u32 v14, v12, v2 352; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v2, 0 353; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v14, v5 354; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 355; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 356; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v5, v8, vcc 357; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v13, vcc 358; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 359; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc 360; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v12, v2 361; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v11, v3, vcc 362; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v1 363; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 364; GFX9-NEXT: v_xor_b32_e32 v6, v0, v5 365; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v5, vcc 366; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v3, 0 367; GFX9-NEXT: v_mul_hi_u32 v7, v6, v2 368; GFX9-NEXT: v_xor_b32_e32 v4, v4, v5 369; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v0 370; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc 371; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0 372; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0 373; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 374; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v8, v1, vcc 375; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v13, vcc 376; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 377; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 378; GFX9-NEXT: v_mul_lo_u32 v2, v9, v0 379; GFX9-NEXT: v_mul_lo_u32 v3, v10, v1 380; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v0, 0 381; GFX9-NEXT: v_add3_u32 v1, v1, v3, v2 382; GFX9-NEXT: v_sub_u32_e32 v2, v4, v1 383; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v6, v0 384; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v2, v9, vcc 385; GFX9-NEXT: v_sub_co_u32_e64 v3, s[4:5], v0, v10 386; GFX9-NEXT: v_subbrev_co_u32_e64 v6, s[6:7], 0, v2, s[4:5] 387; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v6, v9 388; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[6:7] 389; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v3, v10 390; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[6:7] 391; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v6, v9 392; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v2, v9, s[4:5] 393; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[6:7] 394; GFX9-NEXT: v_sub_co_u32_e64 v8, s[4:5], v3, v10 395; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v4, v1, vcc 396; GFX9-NEXT: v_subbrev_co_u32_e64 v2, s[4:5], 0, v2, s[4:5] 397; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v9 398; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v7 399; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc 400; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v10 401; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5] 402; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc 403; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v9 404; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 405; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 406; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 407; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v8, s[4:5] 408; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 409; GFX9-NEXT: v_xor_b32_e32 v0, v0, v5 410; GFX9-NEXT: v_xor_b32_e32 v1, v1, v5 411; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v5 412; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v5, vcc 413; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 414; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 415; GFX9-NEXT: .LBB2_2: ; %Flow 416; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[8:9] 417; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5] 418; GFX9-NEXT: s_cbranch_execz .LBB2_4 419; GFX9-NEXT: ; %bb.3: 420; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 421; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 422; GFX9-NEXT: v_mov_b32_e32 v5, 0 423; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 424; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 425; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 426; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 427; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 428; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 429; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 430; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2 431; GFX9-NEXT: v_sub_u32_e32 v0, v0, v1 432; GFX9-NEXT: v_sub_u32_e32 v1, v0, v2 433; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 434; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 435; GFX9-NEXT: v_sub_u32_e32 v1, v0, v2 436; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 437; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc 438; GFX9-NEXT: .LBB2_4: 439; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 440; GFX9-NEXT: v_mov_b32_e32 v0, v4 441; GFX9-NEXT: v_mov_b32_e32 v1, v5 442; GFX9-NEXT: s_setpc_b64 s[30:31] 443 %d = srem i64 %a, %b 444 ret i64 %d 445} 446 447define i64 @urem64(i64 %a, i64 %b) { 448; GFX9-LABEL: urem64: 449; GFX9: ; %bb.0: 450; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 451; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 452; GFX9-NEXT: v_mov_b32_e32 v4, 0 453; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 454; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 455; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 456; GFX9-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 457; GFX9-NEXT: s_cbranch_execz .LBB3_2 458; GFX9-NEXT: ; %bb.1: 459; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v2 460; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3 461; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2 462; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc 463; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 464; GFX9-NEXT: v_rcp_f32_e32 v4, v4 465; GFX9-NEXT: v_mov_b32_e32 v13, 0 466; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 467; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 468; GFX9-NEXT: v_trunc_f32_e32 v5, v5 469; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 470; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5 471; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4 472; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8 473; GFX9-NEXT: v_mul_lo_u32 v7, v11, v9 474; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0 475; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 476; GFX9-NEXT: v_mul_hi_u32 v12, v9, v4 477; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v7, 0 478; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v5 479; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v8, v4, 0 480; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, 0, v6, vcc 481; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v7, 0 482; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v12, v4 483; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v14, v5, vcc 484; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v7, v13, vcc 485; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6 486; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc 487; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v9, v4 488; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, v8, v5, vcc 489; GFX9-NEXT: v_mul_lo_u32 v6, v10, v14 490; GFX9-NEXT: v_mul_lo_u32 v7, v11, v12 491; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v12, 0 492; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 493; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v14, v7, 0 494; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v7, 0 495; GFX9-NEXT: v_mul_hi_u32 v11, v12, v4 496; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v14, v4, 0 497; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v11, v7 498; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v8, vcc 499; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 500; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v10, vcc 501; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v6, v13, vcc 502; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v5 503; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 504; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v12, v4 505; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v14, v5, vcc 506; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v7, 0 507; GFX9-NEXT: v_mul_hi_u32 v8, v0, v6 508; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v4 509; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v5, vcc 510; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v6, 0 511; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v7, 0 512; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v8, v4 513; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v9, v5, vcc 514; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v7, v13, vcc 515; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6 516; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc 517; GFX9-NEXT: v_mul_lo_u32 v6, v3, v4 518; GFX9-NEXT: v_mul_lo_u32 v7, v2, v5 519; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v4, 0 520; GFX9-NEXT: v_add3_u32 v5, v5, v7, v6 521; GFX9-NEXT: v_sub_u32_e32 v6, v1, v5 522; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4 523; GFX9-NEXT: v_subb_co_u32_e64 v4, s[4:5], v6, v3, vcc 524; GFX9-NEXT: v_sub_co_u32_e64 v6, s[4:5], v0, v2 525; GFX9-NEXT: v_subbrev_co_u32_e64 v7, s[6:7], 0, v4, s[4:5] 526; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v7, v3 527; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[6:7] 528; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v6, v2 529; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v5, vcc 530; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7] 531; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v7, v3 532; GFX9-NEXT: v_subb_co_u32_e64 v4, s[4:5], v4, v3, s[4:5] 533; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 534; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7] 535; GFX9-NEXT: v_sub_co_u32_e64 v9, s[4:5], v6, v2 536; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 537; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 538; GFX9-NEXT: v_subbrev_co_u32_e64 v4, s[4:5], 0, v4, s[4:5] 539; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 540; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 541; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v8 542; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc 543; GFX9-NEXT: v_cndmask_b32_e64 v4, v7, v4, s[4:5] 544; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 545; GFX9-NEXT: v_cndmask_b32_e32 v5, v1, v4, vcc 546; GFX9-NEXT: v_cndmask_b32_e64 v1, v6, v9, s[4:5] 547; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc 548; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 549; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 550; GFX9-NEXT: .LBB3_2: ; %Flow 551; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[8:9] 552; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5] 553; GFX9-NEXT: s_cbranch_execz .LBB3_4 554; GFX9-NEXT: ; %bb.3: 555; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 556; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 557; GFX9-NEXT: v_mov_b32_e32 v5, 0 558; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 559; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 560; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 561; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 562; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 563; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 564; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 565; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2 566; GFX9-NEXT: v_sub_u32_e32 v0, v0, v1 567; GFX9-NEXT: v_sub_u32_e32 v1, v0, v2 568; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 569; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 570; GFX9-NEXT: v_sub_u32_e32 v1, v0, v2 571; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 572; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc 573; GFX9-NEXT: .LBB3_4: 574; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 575; GFX9-NEXT: v_mov_b32_e32 v0, v4 576; GFX9-NEXT: v_mov_b32_e32 v1, v5 577; GFX9-NEXT: s_setpc_b64 s[30:31] 578 %d = urem i64 %a, %b 579 ret i64 %d 580} 581 582define i32 @sdiv32(i32 %a, i32 %b) { 583; GFX9-LABEL: sdiv32: 584; GFX9: ; %bb.0: 585; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 586; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1 587; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 588; GFX9-NEXT: v_xor_b32_e32 v1, v1, v2 589; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v1 590; GFX9-NEXT: v_sub_u32_e32 v4, 0, v1 591; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v0 592; GFX9-NEXT: v_add_u32_e32 v0, v0, v5 593; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 594; GFX9-NEXT: v_xor_b32_e32 v0, v0, v5 595; GFX9-NEXT: v_xor_b32_e32 v2, v5, v2 596; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 597; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 598; GFX9-NEXT: v_mul_lo_u32 v4, v4, v3 599; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 600; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 601; GFX9-NEXT: v_mul_hi_u32 v3, v0, v3 602; GFX9-NEXT: v_mul_lo_u32 v4, v3, v1 603; GFX9-NEXT: v_add_u32_e32 v5, 1, v3 604; GFX9-NEXT: v_sub_u32_e32 v0, v0, v4 605; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 606; GFX9-NEXT: v_sub_u32_e32 v4, v0, v1 607; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 608; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 609; GFX9-NEXT: v_add_u32_e32 v4, 1, v3 610; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 611; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc 612; GFX9-NEXT: v_xor_b32_e32 v0, v0, v2 613; GFX9-NEXT: v_sub_u32_e32 v0, v0, v2 614; GFX9-NEXT: s_setpc_b64 s[30:31] 615 %d = sdiv i32 %a, %b 616 ret i32 %d 617} 618 619define i32 @udiv32(i32 %a, i32 %b) { 620; GFX9-LABEL: udiv32: 621; GFX9: ; %bb.0: 622; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 623; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v1 624; GFX9-NEXT: v_sub_u32_e32 v3, 0, v1 625; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 626; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 627; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 628; GFX9-NEXT: v_mul_lo_u32 v3, v3, v2 629; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 630; GFX9-NEXT: v_add_u32_e32 v2, v2, v3 631; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 632; GFX9-NEXT: v_mul_lo_u32 v3, v2, v1 633; GFX9-NEXT: v_add_u32_e32 v4, 1, v2 634; GFX9-NEXT: v_sub_u32_e32 v0, v0, v3 635; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 636; GFX9-NEXT: v_sub_u32_e32 v3, v0, v1 637; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 638; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 639; GFX9-NEXT: v_add_u32_e32 v3, 1, v2 640; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 641; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 642; GFX9-NEXT: s_setpc_b64 s[30:31] 643 %d = udiv i32 %a, %b 644 ret i32 %d 645} 646 647define i32 @srem32(i32 %a, i32 %b) { 648; GFX9-LABEL: srem32: 649; GFX9: ; %bb.0: 650; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 651; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1 652; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 653; GFX9-NEXT: v_xor_b32_e32 v1, v1, v2 654; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v1 655; GFX9-NEXT: v_sub_u32_e32 v3, 0, v1 656; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v0 657; GFX9-NEXT: v_add_u32_e32 v0, v0, v4 658; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 659; GFX9-NEXT: v_xor_b32_e32 v0, v0, v4 660; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 661; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 662; GFX9-NEXT: v_mul_lo_u32 v3, v3, v2 663; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 664; GFX9-NEXT: v_add_u32_e32 v2, v2, v3 665; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 666; GFX9-NEXT: v_mul_lo_u32 v2, v2, v1 667; GFX9-NEXT: v_sub_u32_e32 v0, v0, v2 668; GFX9-NEXT: v_sub_u32_e32 v2, v0, v1 669; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 670; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 671; GFX9-NEXT: v_sub_u32_e32 v2, v0, v1 672; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 673; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 674; GFX9-NEXT: v_xor_b32_e32 v0, v0, v4 675; GFX9-NEXT: v_sub_u32_e32 v0, v0, v4 676; GFX9-NEXT: s_setpc_b64 s[30:31] 677 %d = srem i32 %a, %b 678 ret i32 %d 679} 680 681define i32 @urem32(i32 %a, i32 %b) { 682; GFX9-LABEL: urem32: 683; GFX9: ; %bb.0: 684; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 685; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v1 686; GFX9-NEXT: v_sub_u32_e32 v3, 0, v1 687; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 688; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 689; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 690; GFX9-NEXT: v_mul_lo_u32 v3, v3, v2 691; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 692; GFX9-NEXT: v_add_u32_e32 v2, v2, v3 693; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 694; GFX9-NEXT: v_mul_lo_u32 v2, v2, v1 695; GFX9-NEXT: v_sub_u32_e32 v0, v0, v2 696; GFX9-NEXT: v_sub_u32_e32 v2, v0, v1 697; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 698; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 699; GFX9-NEXT: v_sub_u32_e32 v2, v0, v1 700; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 701; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 702; GFX9-NEXT: s_setpc_b64 s[30:31] 703 %d = urem i32 %a, %b 704 ret i32 %d 705} 706 707define <2 x i64> @sdivrem64(i64 %a, i64 %b) { 708; GFX9-LABEL: sdivrem64: 709; GFX9: ; %bb.0: 710; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 711; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 712; GFX9-NEXT: v_mov_b32_e32 v4, 0 713; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 714; GFX9-NEXT: ; implicit-def: $vgpr6_vgpr7 715; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 716; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 717; GFX9-NEXT: s_xor_b64 s[10:11], exec, s[4:5] 718; GFX9-NEXT: s_cbranch_execz .LBB8_2 719; GFX9-NEXT: ; %bb.1: 720; GFX9-NEXT: v_ashrrev_i32_e32 v9, 31, v3 721; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v9 722; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v9, vcc 723; GFX9-NEXT: v_xor_b32_e32 v10, v3, v9 724; GFX9-NEXT: v_xor_b32_e32 v11, v2, v9 725; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v11 726; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v10 727; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v11 728; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v10, vcc 729; GFX9-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 730; GFX9-NEXT: v_rcp_f32_e32 v2, v2 731; GFX9-NEXT: v_mov_b32_e32 v14, 0 732; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 733; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 734; GFX9-NEXT: v_trunc_f32_e32 v3, v3 735; GFX9-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 736; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2 737; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v3 738; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6 739; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0 740; GFX9-NEXT: v_mul_lo_u32 v5, v7, v12 741; GFX9-NEXT: v_mul_hi_u32 v13, v6, v2 742; GFX9-NEXT: v_add3_u32 v5, v3, v5, v4 743; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 744; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v13, v3 745; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v2, 0 746; GFX9-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v4, vcc 747; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v5, 0 748; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2 749; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v15, v3, vcc 750; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v14, vcc 751; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 752; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc 753; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v6, v2 754; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v12, v3, vcc 755; GFX9-NEXT: v_mul_lo_u32 v4, v7, v12 756; GFX9-NEXT: v_mul_lo_u32 v5, v8, v13 757; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v13, 0 758; GFX9-NEXT: v_add3_u32 v5, v3, v4, v5 759; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v5, 0 760; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v5, 0 761; GFX9-NEXT: v_mul_hi_u32 v15, v13, v2 762; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v2, 0 763; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v15, v5 764; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 765; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 766; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v5, v8, vcc 767; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v14, vcc 768; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 769; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc 770; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2 771; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v12, v3, vcc 772; GFX9-NEXT: v_ashrrev_i32_e32 v7, 31, v1 773; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v7 774; GFX9-NEXT: v_xor_b32_e32 v5, v0, v7 775; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v7, vcc 776; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v3, 0 777; GFX9-NEXT: v_mul_hi_u32 v6, v5, v2 778; GFX9-NEXT: v_xor_b32_e32 v4, v4, v7 779; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v6, v0 780; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc 781; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0 782; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0 783; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 784; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v8, v1, vcc 785; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v14, vcc 786; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2 787; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc 788; GFX9-NEXT: v_mul_lo_u32 v6, v10, v2 789; GFX9-NEXT: v_mul_lo_u32 v8, v11, v3 790; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v2, 0 791; GFX9-NEXT: v_add3_u32 v1, v1, v8, v6 792; GFX9-NEXT: v_sub_u32_e32 v6, v4, v1 793; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v5, v0 794; GFX9-NEXT: v_subb_co_u32_e64 v6, s[4:5], v6, v10, vcc 795; GFX9-NEXT: v_sub_co_u32_e64 v8, s[4:5], v0, v11 796; GFX9-NEXT: v_subbrev_co_u32_e64 v12, s[6:7], 0, v6, s[4:5] 797; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v10 798; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7] 799; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v8, v11 800; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7] 801; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v12, v10 802; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7] 803; GFX9-NEXT: v_add_co_u32_e64 v13, s[6:7], 2, v2 804; GFX9-NEXT: v_addc_co_u32_e64 v14, s[6:7], 0, v3, s[6:7] 805; GFX9-NEXT: v_add_co_u32_e64 v15, s[6:7], 1, v2 806; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v4, v1, vcc 807; GFX9-NEXT: v_addc_co_u32_e64 v16, s[6:7], 0, v3, s[6:7] 808; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 809; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v5 810; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc 811; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v11 812; GFX9-NEXT: v_cndmask_b32_e64 v5, v16, v14, s[6:7] 813; GFX9-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc 814; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v10 815; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc 816; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 817; GFX9-NEXT: v_cndmask_b32_e64 v4, v15, v13, s[6:7] 818; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 819; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 820; GFX9-NEXT: v_xor_b32_e32 v5, v7, v9 821; GFX9-NEXT: v_xor_b32_e32 v2, v2, v5 822; GFX9-NEXT: v_xor_b32_e32 v3, v3, v5 823; GFX9-NEXT: v_sub_co_u32_e64 v4, s[8:9], v2, v5 824; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v6, v10, s[4:5] 825; GFX9-NEXT: v_subb_co_u32_e64 v5, s[8:9], v3, v5, s[8:9] 826; GFX9-NEXT: v_sub_co_u32_e64 v3, s[4:5], v8, v11 827; GFX9-NEXT: v_subbrev_co_u32_e64 v2, s[4:5], 0, v2, s[4:5] 828; GFX9-NEXT: v_cndmask_b32_e64 v2, v12, v2, s[6:7] 829; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 830; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v3, s[6:7] 831; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 832; GFX9-NEXT: v_xor_b32_e32 v0, v0, v7 833; GFX9-NEXT: v_xor_b32_e32 v1, v1, v7 834; GFX9-NEXT: v_sub_co_u32_e32 v6, vcc, v0, v7 835; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v1, v7, vcc 836; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 837; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 838; GFX9-NEXT: .LBB8_2: ; %Flow 839; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[10:11] 840; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5] 841; GFX9-NEXT: s_cbranch_execz .LBB8_4 842; GFX9-NEXT: ; %bb.3: 843; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 844; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 845; GFX9-NEXT: v_mov_b32_e32 v5, 0 846; GFX9-NEXT: v_mov_b32_e32 v7, v5 847; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 848; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 849; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 850; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 851; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 852; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 853; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 854; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 855; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 856; GFX9-NEXT: v_sub_u32_e32 v0, v0, v3 857; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 858; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 859; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 860; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 861; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 862; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 863; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 864; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v3, vcc 865; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v4, vcc 866; GFX9-NEXT: .LBB8_4: 867; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 868; GFX9-NEXT: v_mov_b32_e32 v0, v4 869; GFX9-NEXT: v_mov_b32_e32 v1, v5 870; GFX9-NEXT: v_mov_b32_e32 v2, v6 871; GFX9-NEXT: v_mov_b32_e32 v3, v7 872; GFX9-NEXT: s_setpc_b64 s[30:31] 873 %d = sdiv i64 %a, %b 874 %r = srem i64 %a, %b 875 %ins.0 = insertelement <2 x i64> undef, i64 %d, i32 0 876 %ins.1 = insertelement <2 x i64> %ins.0, i64 %r, i32 1 877 ret <2 x i64> %ins.1 878} 879 880define <2 x i64> @udivrem64(i64 %a, i64 %b) { 881; GFX9-LABEL: udivrem64: 882; GFX9: ; %bb.0: 883; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 884; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 885; GFX9-NEXT: v_mov_b32_e32 v4, 0 886; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 887; GFX9-NEXT: ; implicit-def: $vgpr6_vgpr7 888; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 889; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 890; GFX9-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 891; GFX9-NEXT: s_cbranch_execz .LBB9_2 892; GFX9-NEXT: ; %bb.1: 893; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v2 894; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3 895; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2 896; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc 897; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 898; GFX9-NEXT: v_rcp_f32_e32 v4, v4 899; GFX9-NEXT: v_mov_b32_e32 v13, 0 900; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 901; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 902; GFX9-NEXT: v_trunc_f32_e32 v5, v5 903; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 904; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5 905; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4 906; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8 907; GFX9-NEXT: v_mul_lo_u32 v7, v11, v9 908; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0 909; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 910; GFX9-NEXT: v_mul_hi_u32 v12, v9, v4 911; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v7, 0 912; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v5 913; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v8, v4, 0 914; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, 0, v6, vcc 915; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v7, 0 916; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v12, v4 917; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v14, v5, vcc 918; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v7, v13, vcc 919; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6 920; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc 921; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v9, v4 922; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, v8, v5, vcc 923; GFX9-NEXT: v_mul_lo_u32 v6, v10, v14 924; GFX9-NEXT: v_mul_lo_u32 v7, v11, v12 925; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v12, 0 926; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 927; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v14, v7, 0 928; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v7, 0 929; GFX9-NEXT: v_mul_hi_u32 v11, v12, v4 930; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v14, v4, 0 931; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v11, v7 932; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v8, vcc 933; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 934; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v10, vcc 935; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v6, v13, vcc 936; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v5 937; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 938; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v12, v4 939; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v14, v5, vcc 940; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v7, 0 941; GFX9-NEXT: v_mul_hi_u32 v8, v0, v6 942; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v4 943; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v5, vcc 944; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v6, 0 945; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v7, 0 946; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v8, v4 947; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v9, v5, vcc 948; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v7, v13, vcc 949; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v4, v6 950; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v5, vcc 951; GFX9-NEXT: v_mul_lo_u32 v8, v3, v6 952; GFX9-NEXT: v_mul_lo_u32 v9, v2, v7 953; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, 0 954; GFX9-NEXT: v_add3_u32 v5, v5, v9, v8 955; GFX9-NEXT: v_sub_u32_e32 v8, v1, v5 956; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4 957; GFX9-NEXT: v_subb_co_u32_e64 v8, s[4:5], v8, v3, vcc 958; GFX9-NEXT: v_sub_co_u32_e64 v9, s[4:5], v0, v2 959; GFX9-NEXT: v_subbrev_co_u32_e64 v10, s[6:7], 0, v8, s[4:5] 960; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v10, v3 961; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[6:7] 962; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v2 963; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[6:7] 964; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v10, v3 965; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v11, s[6:7] 966; GFX9-NEXT: v_add_co_u32_e64 v11, s[6:7], 2, v6 967; GFX9-NEXT: v_addc_co_u32_e64 v12, s[6:7], 0, v7, s[6:7] 968; GFX9-NEXT: v_add_co_u32_e64 v13, s[6:7], 1, v6 969; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v5, vcc 970; GFX9-NEXT: v_addc_co_u32_e64 v14, s[6:7], 0, v7, s[6:7] 971; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 972; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4 973; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 974; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 975; GFX9-NEXT: v_cndmask_b32_e64 v4, v14, v12, s[6:7] 976; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc 977; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 978; GFX9-NEXT: v_subb_co_u32_e64 v3, s[4:5], v8, v3, s[4:5] 979; GFX9-NEXT: v_sub_co_u32_e64 v2, s[4:5], v9, v2 980; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v12, vcc 981; GFX9-NEXT: v_subbrev_co_u32_e64 v3, s[4:5], 0, v3, s[4:5] 982; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 983; GFX9-NEXT: v_cndmask_b32_e64 v3, v10, v3, s[6:7] 984; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v4, vcc 985; GFX9-NEXT: v_cndmask_b32_e64 v4, v13, v11, s[6:7] 986; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc 987; GFX9-NEXT: v_cndmask_b32_e64 v1, v9, v2, s[6:7] 988; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc 989; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v1, vcc 990; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 991; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 992; GFX9-NEXT: .LBB9_2: ; %Flow 993; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[8:9] 994; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5] 995; GFX9-NEXT: s_cbranch_execz .LBB9_4 996; GFX9-NEXT: ; %bb.3: 997; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 998; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 999; GFX9-NEXT: v_mov_b32_e32 v5, 0 1000; GFX9-NEXT: v_mov_b32_e32 v7, v5 1001; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 1002; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 1003; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 1004; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 1005; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 1006; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 1007; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 1008; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 1009; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 1010; GFX9-NEXT: v_sub_u32_e32 v0, v0, v3 1011; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 1012; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 1013; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1014; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 1015; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 1016; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 1017; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 1018; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v3, vcc 1019; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v4, vcc 1020; GFX9-NEXT: .LBB9_4: 1021; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 1022; GFX9-NEXT: v_mov_b32_e32 v0, v4 1023; GFX9-NEXT: v_mov_b32_e32 v1, v5 1024; GFX9-NEXT: v_mov_b32_e32 v2, v6 1025; GFX9-NEXT: v_mov_b32_e32 v3, v7 1026; GFX9-NEXT: s_setpc_b64 s[30:31] 1027 %d = udiv i64 %a, %b 1028 %r = urem i64 %a, %b 1029 %ins.0 = insertelement <2 x i64> undef, i64 %d, i32 0 1030 %ins.1 = insertelement <2 x i64> %ins.0, i64 %r, i32 1 1031 ret <2 x i64> %ins.1 1032} 1033 1034define i64 @sdiv64_known32(i64 %a, i64 %b) { 1035; GFX9-LABEL: sdiv64_known32: 1036; GFX9: ; %bb.0: 1037; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1038; GFX9-NEXT: v_cvt_f32_u32_e32 v0, v3 1039; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v1 1040; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v0 1041; GFX9-NEXT: v_mul_f32_e32 v2, v1, v2 1042; GFX9-NEXT: v_trunc_f32_e32 v2, v2 1043; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v2 1044; GFX9-NEXT: v_mad_f32 v1, -v2, v0, v1 1045; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 1046; GFX9-NEXT: v_mov_b32_e32 v1, 0 1047; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v3, vcc 1048; GFX9-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 1049; GFX9-NEXT: s_setpc_b64 s[30:31] 1050 %a.ext = ashr i64 %a, 32 1051 %b.ext = ashr i64 %b, 32 1052 %d = udiv i64 %a.ext, %b.ext 1053 ret i64 %d 1054} 1055 1056define i64 @udiv64_known32(i64 %a, i64 %b) { 1057; GFX9-LABEL: udiv64_known32: 1058; GFX9: ; %bb.0: 1059; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1060; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 1061; GFX9-NEXT: v_cvt_f32_u32_e32 v0, v0 1062; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v1 1063; GFX9-NEXT: v_mul_f32_e32 v2, v0, v2 1064; GFX9-NEXT: v_trunc_f32_e32 v2, v2 1065; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v2 1066; GFX9-NEXT: v_mad_f32 v0, -v2, v1, v0 1067; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1 1068; GFX9-NEXT: v_mov_b32_e32 v1, 0 1069; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v3, vcc 1070; GFX9-NEXT: s_setpc_b64 s[30:31] 1071 %a.mask = and i64 %a, 4294967295 1072 %b.mask = and i64 %b, 4294967295 1073 %d = udiv i64 %a.mask, %b.mask 1074 ret i64 %d 1075} 1076