1; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN-ISEL %s 2 3; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CISI %s 4; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s 5; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 6; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX1010,GFX10W32 %s 7; RUN: llc -march=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize32 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX1030,GFX10W32 %s 8; RUN: llc -march=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX1030,GFX10W64 %s 9; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX11 %s 10 11; GCN-ISEL-LABEL: name: sadd64rr 12; GCN-ISEL-LABEL: body: 13; GCN-ISEL-LABEL: bb.0.entry: 14; GCN-ISEL: S_ADD_U64_PSEUDO 15 16; GCN-LABEL: @sadd64rr 17; GCN: s_add_u32 18; GCN: s_addc_u32 19define amdgpu_kernel void @sadd64rr(i64 addrspace(1)* %out, i64 %a, i64 %b) { 20entry: 21 %add = add i64 %a, %b 22 store i64 %add, i64 addrspace(1)* %out 23 ret void 24} 25 26; GCN-ISEL-LABEL: name: sadd64ri 27; GCN-ISEL-LABEL: body: 28; GCN-ISEL-LABEL: bb.0.entry: 29; GCN-ISEL: S_ADD_U64_PSEUDO 30 31; GCN-LABEL: @sadd64ri 32; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x56789876 33; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1234 34define amdgpu_kernel void @sadd64ri(i64 addrspace(1)* %out, i64 %a) { 35entry: 36 %add = add i64 20015998343286, %a 37 store i64 %add, i64 addrspace(1)* %out 38 ret void 39} 40 41; GCN-ISEL-LABEL: name: vadd64rr 42; GCN-ISEL-LABEL: body: 43; GCN-ISEL-LABEL: bb.0.entry: 44; GCN-ISEL: V_ADD_U64_PSEUDO 45 46; GCN-LABEL: @vadd64rr 47; 48; CISI: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 49; CISI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 50; 51; VI: v_add_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 52; VI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 53; 54; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 55; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 56; 57; GFX10W32: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}} 58; GFX10W64: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, v{{[0-9]+}} 59; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] 60; GFX1030: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]] 61; 62; GFX11: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}} 63; GFX11: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]] 64define amdgpu_kernel void @vadd64rr(i64 addrspace(1)* %out, i64 %a) { 65entry: 66 %tid = call i32 @llvm.amdgcn.workitem.id.x() 67 %tid.ext = sext i32 %tid to i64 68 %add = add i64 %a, %tid.ext 69 store i64 %add, i64 addrspace(1)* %out 70 ret void 71} 72 73; GCN-ISEL-LABEL: name: vadd64ri 74; GCN-ISEL-LABEL: body: 75; GCN-ISEL-LABEL: bb.0.entry: 76; GCN-ISEL: V_ADD_U64_PSEUDO 77 78; GCN-LABEL: @vadd64ri 79; 80; CISI: v_add_i32_e32 v0, vcc, 0x56789876, v0 81; CISI: v_mov_b32_e32 v1, 0x1234 82; CISI: v_addc_u32_e32 v1, vcc, 0, v1, vcc 83; 84; VI: v_add_u32_e32 v0, vcc, 0x56789876, v0 85; VI: v_mov_b32_e32 v1, 0x1234 86; VI: v_addc_u32_e32 v1, vcc, 0, v1, vcc 87; 88; GFX9: v_add_co_u32_e32 v0, vcc, 0x56789876, v0 89; GFX9: v_mov_b32_e32 v1, 0x1234 90; GFX9: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 91; 92; GFX10W32: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} 93; GFX10W64: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s\[[0-9]+:[0-9]+\]]], 0x56789876, v{{[0-9]+}} 94; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0, 0x1234, [[CARRY]] 95; GFX1030: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, 0, 0x1234, [[CARRY]] 96; 97; GFX11: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} 98; GFX11: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, 0, 0x1234, [[CARRY]] 99define amdgpu_kernel void @vadd64ri(i64 addrspace(1)* %out) { 100entry: 101 %tid = call i32 @llvm.amdgcn.workitem.id.x() 102 %tid.ext = sext i32 %tid to i64 103 %add = add i64 20015998343286, %tid.ext 104 store i64 %add, i64 addrspace(1)* %out 105 ret void 106} 107 108; GCN-ISEL-LABEL: name: suaddo32 109; GCN-ISEL-LABEL: body: 110; GCN-ISEL-LABEL: bb.0 111; GCN-ISEL: S_ADD_I32 112define amdgpu_kernel void @suaddo32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { 113 %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) 114 %val = extractvalue { i32, i1 } %uadd, 0 115 %carry = extractvalue { i32, i1 } %uadd, 1 116 store i32 %val, i32 addrspace(1)* %out, align 4 117 ret void 118} 119 120 121; GCN-ISEL-LABEL: name: uaddo32_vcc_user 122; GCN-ISEL-LABEL: body: 123; GCN-ISEL-LABEL: bb.0 124; GCN-ISEL: V_ADD_CO_U32_e64 125 126; below we check selection to v_add/addc 127; because the only user of VCC produced by the UADDOis v_cndmask. 128; We select to VALU form to avoid unnecessary s_cselect to copy SCC to VCC 129 130; GCN-LABEL: @uaddo32_vcc_user 131; 132; CISI: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 133; CISI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc 134; 135; VI: v_add_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 136; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc 137; 138; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 139; GFX9: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc 140; 141; GFX10W32: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}} 142; GFX10W64: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, s{{[0-9]+}} 143; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CARRY]] 144; 145; GFX11: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}} 146; GFX11: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CARRY]] 147define amdgpu_kernel void @uaddo32_vcc_user(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { 148 %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) 149 %val = extractvalue { i32, i1 } %uadd, 0 150 %carry = extractvalue { i32, i1 } %uadd, 1 151 store i32 %val, i32 addrspace(1)* %out, align 4 152 store i1 %carry, i1 addrspace(1)* %carryout 153 ret void 154} 155 156; GCN-ISEL-LABEL: name: suaddo64 157; GCN-ISEL-LABEL: body: 158; GCN-ISEL-LABEL: bb.0 159; GCN-ISEL: S_ADD_U64_PSEUDO 160 161; GCN-LABEL: @suaddo64 162; 163; GCN: s_add_u32 164; GCN: s_addc_u32 165define amdgpu_kernel void @suaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) #0 { 166 %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) 167 %val = extractvalue { i64, i1 } %uadd, 0 168 %carry = extractvalue { i64, i1 } %uadd, 1 169 store i64 %val, i64 addrspace(1)* %out, align 8 170 store i1 %carry, i1 addrspace(1)* %carryout 171 ret void 172} 173 174; GCN-ISEL-LABEL: name: vuaddo64 175; GCN-ISEL-LABEL: body: 176; GCN-ISEL-LABEL: bb.0 177; GCN-ISEL: V_ADD_U64_PSEUDO 178 179; GCN-LABEL: @vuaddo64 180; 181; CISI: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 182; CISI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 183; 184; VI: v_add_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 185; VI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 186; 187; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 188; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 189; 190; GFX10W32: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0 191; GFX10W64: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, v0 192; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] 193; GFX1030: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]] 194; 195; GFX11: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0 196; GFX11: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]] 197define amdgpu_kernel void @vuaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 { 198 %tid = call i32 @llvm.amdgcn.workitem.id.x() 199 %tid.ext = sext i32 %tid to i64 200 %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %tid.ext) 201 %val = extractvalue { i64, i1 } %uadd, 0 202 %carry = extractvalue { i64, i1 } %uadd, 1 203 store i64 %val, i64 addrspace(1)* %out, align 8 204 store i1 %carry, i1 addrspace(1)* %carryout 205 ret void 206} 207 208; GCN-ISEL-LABEL: name: ssub64rr 209; GCN-ISEL-LABEL: body: 210; GCN-ISEL-LABEL: bb.0.entry: 211; GCN-ISEL: S_SUB_U64_PSEUDO 212 213; GCN-LABEL: @ssub64rr 214; GCN: s_sub_u32 215; GCN: s_subb_u32 216define amdgpu_kernel void @ssub64rr(i64 addrspace(1)* %out, i64 %a, i64 %b) { 217entry: 218 %sub = sub i64 %a, %b 219 store i64 %sub, i64 addrspace(1)* %out 220 ret void 221} 222 223; GCN-ISEL-LABEL: name: ssub64ri 224; GCN-ISEL-LABEL: body: 225; GCN-ISEL-LABEL: bb.0.entry: 226; GCN-ISEL: S_SUB_U64_PSEUDO 227 228; GCN-LABEL: @ssub64ri 229; GCN: s_sub_u32 s{{[0-9]+}}, 0x56789876, s{{[0-9]+}} 230; GCN: s_subb_u32 s{{[0-9]+}}, 0x1234, s{{[0-9]+}} 231define amdgpu_kernel void @ssub64ri(i64 addrspace(1)* %out, i64 %a) { 232entry: 233 %sub = sub i64 20015998343286, %a 234 store i64 %sub, i64 addrspace(1)* %out 235 ret void 236} 237 238; GCN-ISEL-LABEL: name: vsub64rr 239; GCN-ISEL-LABEL: body: 240; GCN-ISEL-LABEL: bb.0.entry: 241; GCN-ISEL: V_SUB_U64_PSEUDO 242 243; GCN-LABEL: @vsub64rr 244; 245; CISI: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 246; CISI: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 247; 248; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 249; VI: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 250; 251; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 252; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 253; 254; GFX10W32: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}} 255; GFX10W64: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, v{{[0-9]+}} 256; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] 257; GFX1030: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]] 258; 259; GFX11: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}} 260; GFX11: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]] 261define amdgpu_kernel void @vsub64rr(i64 addrspace(1)* %out, i64 %a) { 262entry: 263 %tid = call i32 @llvm.amdgcn.workitem.id.x() 264 %tid.ext = sext i32 %tid to i64 265 %sub = sub i64 %a, %tid.ext 266 store i64 %sub, i64 addrspace(1)* %out 267 ret void 268} 269 270; GCN-ISEL-LABEL: name: vsub64ri 271; GCN-ISEL-LABEL: body: 272; GCN-ISEL-LABEL: bb.0.entry: 273; GCN-ISEL: V_SUB_U64_PSEUDO 274 275; GCN-LABEL: @vsub64ri 276; 277; CISI: v_sub_i32_e32 v0, vcc, 0x56789876, v0 278; CISI: v_mov_b32_e32 v1, 0x1234 279; CISI: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 280; 281; VI: v_sub_u32_e32 v0, vcc, 0x56789876, v0 282; VI: v_mov_b32_e32 v1, 0x1234 283; VI: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 284; 285; GFX9: v_sub_co_u32_e32 v0, vcc, 0x56789876, v0 286; GFX9: v_mov_b32_e32 v1, 0x1234 287; GFX9: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc 288; 289; GFX10W32: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} 290; GFX10W64: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s\[[0-9]+:[0-9]+\]]], 0x56789876, v{{[0-9]+}} 291; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0x1234, 0, [[CARRY]] 292; GFX1030: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, 0x1234, 0, [[CARRY]] 293; 294; GFX11: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} 295; GFX11: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, 0x1234, 0, [[CARRY]] 296define amdgpu_kernel void @vsub64ri(i64 addrspace(1)* %out) { 297entry: 298 %tid = call i32 @llvm.amdgcn.workitem.id.x() 299 %tid.ext = sext i32 %tid to i64 300 %sub = sub i64 20015998343286, %tid.ext 301 store i64 %sub, i64 addrspace(1)* %out 302 ret void 303} 304 305; GCN-ISEL-LABEL: name: susubo32 306; GCN-ISEL-LABEL: body: 307; GCN-ISEL-LABEL: bb.0 308; GCN-ISEL: S_SUB_I32 309define amdgpu_kernel void @susubo32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { 310 %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) 311 %val = extractvalue { i32, i1 } %usub, 0 312 %carry = extractvalue { i32, i1 } %usub, 1 313 store i32 %val, i32 addrspace(1)* %out, align 4 314 ret void 315} 316 317 318; GCN-ISEL-LABEL: name: usubo32_vcc_user 319; GCN-ISEL-LABEL: body: 320; GCN-ISEL-LABEL: bb.0 321; GCN-ISEL: V_SUB_CO_U32_e64 322 323; below we check selection to v_sub/subb 324; because the only user of VCC produced by the USUBOis v_cndmask. 325; We select to VALU form to avoid unnecessary s_cselect to copy SCC to VCC 326 327; GCN-LABEL: @usubo32_vcc_user 328; 329; CISI: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 330; CISI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc 331; 332; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 333; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc 334; 335; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 336; GFX9: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc 337; 338; GFX10W32: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}} 339; GFX10W64: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, s{{[0-9]+}} 340; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CARRY]] 341; 342; GFX11: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}} 343; GFX11: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CARRY]] 344define amdgpu_kernel void @usubo32_vcc_user(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { 345 %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) 346 %val = extractvalue { i32, i1 } %usub, 0 347 %carry = extractvalue { i32, i1 } %usub, 1 348 store i32 %val, i32 addrspace(1)* %out, align 4 349 store i1 %carry, i1 addrspace(1)* %carryout 350 ret void 351} 352 353; GCN-ISEL-LABEL: name: susubo64 354; GCN-ISEL-LABEL: body: 355; GCN-ISEL-LABEL: bb.0 356; GCN-ISEL: S_SUB_U64_PSEUDO 357 358; GCN-LABEL: @susubo64 359; 360; GCN: s_sub_u32 361; GCN: s_subb_u32 362define amdgpu_kernel void @susubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) #0 { 363 %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) 364 %val = extractvalue { i64, i1 } %usub, 0 365 %carry = extractvalue { i64, i1 } %usub, 1 366 store i64 %val, i64 addrspace(1)* %out, align 8 367 store i1 %carry, i1 addrspace(1)* %carryout 368 ret void 369} 370 371; GCN-ISEL-LABEL: name: vusubo64 372; GCN-ISEL-LABEL: body: 373; GCN-ISEL-LABEL: bb.0 374; GCN-ISEL: V_SUB_U64_PSEUDO 375 376; GCN-LABEL: @vusubo64 377; 378; CISI: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 379; CISI: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 380; 381; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 382; VI: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 383; 384; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 385; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 386; 387; GFX10W32: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0 388; GFX10W64: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, v0 389; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] 390; GFX1030: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]] 391; 392; GFX11: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0 393; GFX11: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]] 394define amdgpu_kernel void @vusubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 { 395 %tid = call i32 @llvm.amdgcn.workitem.id.x() 396 %tid.ext = sext i32 %tid to i64 397 %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %tid.ext) 398 %val = extractvalue { i64, i1 } %usub, 0 399 %carry = extractvalue { i64, i1 } %usub, 1 400 store i64 %val, i64 addrspace(1)* %out, align 8 401 store i1 %carry, i1 addrspace(1)* %carryout 402 ret void 403} 404 405; GCN-ISEL-LABEL: name: sudiv64 406; GCN-ISEL-LABEL: body: 407; GCN-ISEL-LABEL: bb.3 408; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 409; GCN-ISEL: S_ADD_CO_PSEUDO %{{[0-9]+}}, killed %{{[0-9]+}}, killed %[[CARRY]] 410; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 411; GCN-ISEL: S_SUB_CO_PSEUDO killed %{{[0-9]+}}, %{{[0-9]+}}, %[[CARRY]] 412define amdgpu_kernel void @sudiv64(i64 addrspace(1)* %out, i64 %x, i64 %y) { 413 %result = udiv i64 %x, %y 414 store i64 %result, i64 addrspace(1)* %out 415 ret void 416} 417 418 419 420declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) #1 421 422declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 423 424declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) #1 425 426declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1 427 428declare i32 @llvm.amdgcn.workitem.id.x() #1 429 430attributes #0 = { nounwind } 431attributes #1 = { nounwind readnone } 432 433