1# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN 2 3--- 4# old is undefined: only combine when masks are fully enabled and 5# bound_ctrl:1 is set, otherwise the result of DPP VALU op can be undefined. 6# GCN-LABEL: name: old_is_undef 7# GCN: %2:vgpr_32 = IMPLICIT_DEF 8# VOP2: 9# GCN: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec 10# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec 11# GCN: %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec 12# GCN: %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec 13# VOP1: 14# GCN: %12:vgpr_32 = V_NOT_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 15# GCN: %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec 16# GCN: %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec 17# GCN: %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec 18name: old_is_undef 19tracksRegLiveness: true 20body: | 21 bb.0: 22 liveins: $vgpr0, $vgpr1 23 %0:vgpr_32 = COPY $vgpr0 24 %1:vgpr_32 = COPY $vgpr1 25 %2:vgpr_32 = IMPLICIT_DEF 26 27 ; VOP2 28 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 29 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 30 31 %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 32 %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec 33 34 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 35 %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec 36 37 %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 38 %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec 39 40 ; VOP1 41 %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 42 %12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec 43 44 %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 45 %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec 46 47 %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 48 %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec 49 50 %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 51 %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec 52... 53 54# old is zero cases: 55 56# GCN-LABEL: name: old_is_0 57 58# VOP2: 59# case 1: old is zero, masks are fully enabled, bound_ctrl:1 is on: 60# the DPP mov result would be either zero ({src lane disabled}|{src lane is 61# out of range}) or active src lane result - can combine with old = undef. 62# undef is preffered as it makes life easier for the regalloc. 63# GCN: [[U1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 64# GCN: %4:vgpr_32 = V_ADD_U32_dpp [[U1]], %0, %1, 1, 15, 15, 1, implicit $exec 65 66# case 2: old is zero, masks are fully enabled, bound_ctrl:1 is off: 67# as the DPP mov old is zero this case is no different from case 1 - combine it 68# setting bound_ctrl:1 on for the combined DPP VALU op to make old undefined 69# GCN: [[U2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 70# GCN: %6:vgpr_32 = V_ADD_U32_dpp [[U2]], %0, %1, 1, 15, 15, 1, implicit $exec 71 72# case 3: masks are partialy disabled, bound_ctrl:1 is on: 73# the DPP mov result would be either zero ({src lane disabled}|{src lane is 74# out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or 75# active src lane result - can combine with old = src1 of the VALU op. 76# The VALU op should have the same masks as DPP mov as they select lanes 77# with identity value. 78# Special case: the bound_ctrl for the combined DPP VALU op isn't important 79# here but let's make it off to keep the combiner's logic simpler. 80# GCN: %8:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 81 82# case 4: masks are partialy disabled, bound_ctrl:1 is off: 83# the DPP mov result would be either zero ({src lane disabled}|{src lane is 84# out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or 85# active src lane result - can combine with old = src1 of the VALU op. 86# The VALU op should have the same masks as DPP mov as they select 87# lanes with identity value 88# GCN: %10:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 89 90# VOP1: 91# see case 1 92# GCN: [[U3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 93# GCN: %12:vgpr_32 = V_NOT_B32_dpp [[U3]], %0, 1, 15, 15, 1, implicit $exec 94# see case 2 95# GCN: [[U4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 96# GCN: %14:vgpr_32 = V_NOT_B32_dpp [[U4]], %0, 1, 15, 15, 1, implicit $exec 97# case 3 and 4 not appliable as there is no way to specify unchanged result 98# for the unary VALU op 99# GCN: %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec 100# GCN: %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec 101 102name: old_is_0 103tracksRegLiveness: true 104body: | 105 bb.0: 106 liveins: $vgpr0, $vgpr1 107 %0:vgpr_32 = COPY $vgpr0 108 %1:vgpr_32 = COPY $vgpr1 109 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 110 111 ; VOP2 112 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 113 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 114 115 %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 116 %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec 117 118 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 119 %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec 120 121 %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 122 %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec 123 124 ; VOP1 125 %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 126 %12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec 127 128 %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 129 %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec 130 131 %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 132 %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec 133 134 %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 135 %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec 136... 137 138# old is nonzero identity cases: 139 140# old is nonzero identity, masks are fully enabled, bound_ctrl:1 is off: 141# the DPP mov result would be either identity ({src lane disabled}|{out of 142# range}) or src lane result - can combine with old = src1 of the VALU op 143# The DPP VALU op should have the same masks (and bctrl) as DPP mov as they 144# select lanes with identity value 145 146# GCN-LABEL: name: nonzero_old_is_identity_masks_enabled_bctl_off 147# GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec 148# GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec 149# GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec 150# GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec 151 152name: nonzero_old_is_identity_masks_enabled_bctl_off 153tracksRegLiveness: true 154body: | 155 bb.0: 156 liveins: $vgpr0, $vgpr1 157 %0:vgpr_32 = COPY $vgpr0 158 %1:vgpr_32 = COPY $vgpr1 159 160 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 161 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 162 %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec 163 164 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 165 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 0, implicit $exec 166 %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec 167 168 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 169 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 15, 15, 0, implicit $exec 170 %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec 171 172 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 173 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 15, 0, implicit $exec 174 %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec 175... 176 177# old is nonzero identity, masks are partially enabled, bound_ctrl:1 is off: 178# the DPP mov result would be either identity ({src lane disabled}|{src lane is 179# out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or 180# active src lane result - can combine with old = src1 of the VALU op. 181# The DPP VALU op should have the same masks (and bctrl) as DPP mov as they 182# select lanes with identity value 183 184# GCN-LABEL: name: nonzero_old_is_identity_masks_partially_disabled_bctl_off 185# GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 186# GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 187# GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 188# GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 189 190name: nonzero_old_is_identity_masks_partially_disabled_bctl_off 191tracksRegLiveness: true 192body: | 193 bb.0: 194 liveins: $vgpr0, $vgpr1 195 %0:vgpr_32 = COPY $vgpr0 196 %1:vgpr_32 = COPY $vgpr1 197 198 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 199 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 200 %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec 201 202 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 203 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec 204 %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec 205 206 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 207 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec 208 %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec 209 210 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 211 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec 212 %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec 213... 214 215# old is nonzero identity, masks are partially enabled, bound_ctrl:1 is on: 216# the DPP mov result may have 3 different values: 217# 1. the active src lane result 218# 2. 0 if the src lane is disabled|out of range 219# 3. DPP mov's old value if the mov's dest VGPR write is disabled by masks 220# can't combine 221 222# GCN-LABEL: name: nonzero_old_is_identity_masks_partially_disabled_bctl0 223# GCN: %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec 224# GCN: %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec 225# GCN: %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec 226# GCN: %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec 227 228name: nonzero_old_is_identity_masks_partially_disabled_bctl0 229tracksRegLiveness: true 230body: | 231 bb.0: 232 liveins: $vgpr0, $vgpr1 233 %0:vgpr_32 = COPY $vgpr0 234 %1:vgpr_32 = COPY $vgpr1 235 236 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 237 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 238 %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec 239 240 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 241 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 1, implicit $exec 242 %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec 243 244 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 245 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 1, implicit $exec 246 %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec 247 248 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 249 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 1, implicit $exec 250 %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec 251... 252 253# when the DPP source isn't a src0 operand the operation should be commuted if possible 254# GCN-LABEL: name: dpp_commute 255# GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 256# GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 257# GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 258# GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 259# GCN: %16:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 260# GCN: %19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec 261name: dpp_commute 262tracksRegLiveness: true 263body: | 264 bb.0: 265 liveins: $vgpr0, $vgpr1 266 267 %0:vgpr_32 = COPY $vgpr0 268 %1:vgpr_32 = COPY $vgpr1 269 270 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 271 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 272 %4:vgpr_32 = V_MUL_U32_U24_e32 %1, %3, implicit $exec 273 274 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 275 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec 276 %7:vgpr_32 = V_AND_B32_e32 %1, %6, implicit $exec 277 278 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 279 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec 280 %10:vgpr_32 = V_MAX_I32_e32 %1, %9, implicit $exec 281 282 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 283 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec 284 %13:vgpr_32 = V_MIN_I32_e32 %1, %12, implicit $exec 285 286 %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 287 %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec 288 %16:vgpr_32 = V_SUB_CO_U32_e32 %1, %15, implicit-def $vcc, implicit $exec 289 290 ; this cannot be combined because immediate as src0 isn't commutable 291 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 292 %18:vgpr_32 = V_MOV_B32_dpp %17, %0, 1, 14, 15, 0, implicit $exec 293 %19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec 294... 295 296--- 297 298# check for floating point modifiers 299# GCN-LABEL: name: add_f32_e64 300# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 301# GCN: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec 302# GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec 303# GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec 304# GCN: %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec 305 306name: add_f32_e64 307tracksRegLiveness: true 308body: | 309 bb.0: 310 liveins: $vgpr0, $vgpr1 311 312 %0:vgpr_32 = COPY $vgpr0 313 %1:vgpr_32 = COPY $vgpr1 314 %2:vgpr_32 = IMPLICIT_DEF 315 316 ; this shouldn't be combined as omod is set 317 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 318 %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec 319 320 ; this should be combined as all modifiers are default 321 %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 322 %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec 323 324 ; this should be combined as modifiers other than abs|neg are default 325 %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 326 %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec 327 328 ; this shouldn't be combined as modifiers aren't abs|neg 329 %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 330 %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec 331... 332 333# check for e64 modifiers 334# GCN-LABEL: name: add_u32_e64 335# GCN: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec 336# GCN: %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec 337 338name: add_u32_e64 339tracksRegLiveness: true 340body: | 341 bb.0: 342 liveins: $vgpr0, $vgpr1 343 344 %0:vgpr_32 = COPY $vgpr0 345 %1:vgpr_32 = COPY $vgpr1 346 %2:vgpr_32 = IMPLICIT_DEF 347 348 ; this should be combined as all modifiers are default 349 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec 350 %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec 351 352 ; this shouldn't be combined as clamp is set 353 %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec 354 %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec 355... 356 357# GCN-LABEL: name: add_co_u32_e64 358# GCN: %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, %1, 0, implicit $exec 359 360name: add_co_u32_e64 361tracksRegLiveness: true 362body: | 363 bb.0: 364 liveins: $vgpr0, $vgpr1 365 366 %0:vgpr_32 = COPY $vgpr0 367 %1:vgpr_32 = COPY $vgpr1 368 %2:vgpr_32 = IMPLICIT_DEF 369 370 ; this shouldn't be combined as the carry-out is used 371 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec 372 %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, %1, 0, implicit $exec 373 374 S_NOP 0, implicit %5 375... 376 377# tests on sequences of dpp consumers 378# GCN-LABEL: name: dpp_seq 379# GCN: %4:vgpr_32 = V_ADD_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 380# GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 381# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 382# broken sequence: 383# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 384 385name: dpp_seq 386tracksRegLiveness: true 387body: | 388 bb.0: 389 liveins: $vgpr0, $vgpr1 390 %0:vgpr_32 = COPY $vgpr0 391 %1:vgpr_32 = COPY $vgpr1 392 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 393 394 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 395 %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec 396 %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec 397 %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec 398 399 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 400 %8:vgpr_32 = V_ADD_CO_U32_e32 %7, %1, implicit-def $vcc, implicit $exec 401 ; this breaks the sequence 402 %9:vgpr_32 = V_SUB_CO_U32_e32 5, %7, implicit-def $vcc, implicit $exec 403... 404 405# tests on sequences of dpp consumers followed by control flow 406# GCN-LABEL: name: dpp_seq_cf 407# GCN: %4:vgpr_32 = V_ADD_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 408# GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 409# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 410 411name: dpp_seq_cf 412tracksRegLiveness: true 413body: | 414 bb.0: 415 successors: %bb.1, %bb.2 416 liveins: $vgpr0, $vgpr1 417 %0:vgpr_32 = COPY $vgpr0 418 %1:vgpr_32 = COPY $vgpr1 419 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 420 421 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 422 %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec 423 %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec 424 %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec 425 426 %7:sreg_64 = V_CMP_EQ_U32_e64 %5, %6, implicit $exec 427 %8:sreg_64 = SI_IF %7, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec 428 S_BRANCH %bb.1 429 430 bb.1: 431 successors: %bb.2 432 433 bb.2: 434 SI_END_CF %8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec 435... 436 437# old reg def is in diff BB - cannot combine 438# GCN-LABEL: name: old_in_diff_bb 439# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec 440 441name: old_in_diff_bb 442tracksRegLiveness: true 443body: | 444 bb.0: 445 successors: %bb.1 446 liveins: $vgpr0, $vgpr1 447 448 %0:vgpr_32 = COPY $vgpr0 449 %1:vgpr_32 = COPY $vgpr1 450 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 451 S_BRANCH %bb.1 452 453 bb.1: 454 %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec 455 %4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec 456... 457 458# old reg def is in diff BB but bound_ctrl:1 - can combine 459# GCN-LABEL: name: old_in_diff_bb_bctrl_zero 460# GCN: %4:vgpr_32 = V_ADD_U32_dpp {{%[0-9]}}, %0, %1, 1, 15, 15, 1, implicit $exec 461 462name: old_in_diff_bb_bctrl_zero 463tracksRegLiveness: true 464body: | 465 bb.0: 466 successors: %bb.1 467 liveins: $vgpr0, $vgpr1 468 469 %0:vgpr_32 = COPY $vgpr0 470 %1:vgpr_32 = COPY $vgpr1 471 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 472 S_BRANCH %bb.1 473 474 bb.1: 475 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 476 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 477... 478 479# EXEC mask changed between def and use - cannot combine 480# GCN-LABEL: name: exec_changed 481# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 482 483name: exec_changed 484tracksRegLiveness: true 485body: | 486 bb.0: 487 liveins: $vgpr0, $vgpr1 488 489 %0:vgpr_32 = COPY $vgpr0 490 %1:vgpr_32 = COPY $vgpr1 491 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 492 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 493 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 494 %5:sreg_64 = COPY $exec, implicit-def $exec 495 %6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 496... 497 498# test if $old definition is correctly tracked through subreg manipulation pseudos 499 500# GCN-LABEL: name: mul_old_subreg 501# GCN: %7:vgpr_32 = V_MUL_I32_I24_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec 502 503name: mul_old_subreg 504tracksRegLiveness: true 505body: | 506 bb.0: 507 liveins: $vgpr0, $vgpr1 508 509 %0:vreg_64 = COPY $vgpr0 510 %1:vgpr_32 = COPY $vgpr1 511 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 512 %3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec 513 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 514 %5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4 515 %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec 516 %7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec 517... 518 519# GCN-LABEL: name: add_old_subreg 520# GCN: %5:vgpr_32 = V_ADD_U32_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec 521 522name: add_old_subreg 523tracksRegLiveness: true 524body: | 525 bb.0: 526 liveins: $vgpr0, $vgpr1 527 528 %0:vreg_64 = COPY $vgpr0 529 %1:vgpr_32 = COPY $vgpr1 530 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 531 %3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted 532 %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec 533 %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec 534... 535 536# GCN-LABEL: name: add_old_subreg_undef 537# GCN: %5:vgpr_32 = V_ADD_U32_dpp undef %3.sub1, %1, %0.sub1, 1, 15, 15, 1, implicit $exec 538 539name: add_old_subreg_undef 540tracksRegLiveness: true 541body: | 542 bb.0: 543 liveins: $vgpr0, $vgpr1 544 545 %0:vreg_64 = COPY $vgpr0 546 %1:vgpr_32 = COPY $vgpr1 547 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 548 %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef 549 %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec 550 %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec 551... 552 553# Test instruction which does not have modifiers in VOP1 form but does in DPP form. 554# GCN-LABEL: name: dpp_vop1 555# GCN: %3:vgpr_32 = V_CEIL_F32_dpp %0, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec 556name: dpp_vop1 557tracksRegLiveness: true 558body: | 559 bb.0: 560 %1:vgpr_32 = IMPLICIT_DEF 561 %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 562 %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec 563... 564 565# Test instruction which does not have modifiers in VOP2 form but does in DPP form. 566# GCN-LABEL: name: dpp_min 567# GCN: %3:vgpr_32 = V_MIN_F32_dpp %0, 0, undef %2:vgpr_32, 0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec 568name: dpp_min 569tracksRegLiveness: true 570body: | 571 bb.0: 572 %1:vgpr_32 = IMPLICIT_DEF 573 %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 574 %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec 575... 576 577# Test an undef old operand 578# GCN-LABEL: name: dpp_undef_old 579# GCN: %3:vgpr_32 = V_CEIL_F32_dpp undef %1:vgpr_32, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec 580name: dpp_undef_old 581tracksRegLiveness: true 582body: | 583 bb.0: 584 %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 585 %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec 586... 587 588# Do not combine a dpp mov which writes a physreg. 589# GCN-LABEL: name: phys_dpp_mov_dst 590# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec 591# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec 592name: phys_dpp_mov_dst 593tracksRegLiveness: true 594body: | 595 bb.0: 596 $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 597 %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec 598... 599 600# Do not combine a dpp mov which reads a physreg. 601# GCN-LABEL: name: phys_dpp_mov_old_src 602# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec 603# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec 604name: phys_dpp_mov_old_src 605tracksRegLiveness: true 606body: | 607 bb.0: 608 %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 609 %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec 610... 611 612# Do not combine a dpp mov which reads a physreg. 613# GCN-LABEL: name: phys_dpp_mov_src 614# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec 615# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec 616name: phys_dpp_mov_src 617tracksRegLiveness: true 618body: | 619 bb.0: 620 %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec 621 %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec 622... 623 624# GCN-LABEL: name: dpp_reg_sequence_both_combined 625# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 626# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 627# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 628# GCN: %9:vgpr_32 = IMPLICIT_DEF 629# GCN: %8:vgpr_32 = IMPLICIT_DEF 630# GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 631# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 632name: dpp_reg_sequence_both_combined 633tracksRegLiveness: true 634body: | 635 bb.0: 636 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 637 638 %0:vreg_64 = COPY $vgpr0_vgpr1 639 %1:vreg_64 = COPY $vgpr2_vgpr3 640 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 641 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 642 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 643 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 644 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 645 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 646... 647 648# GCN-LABEL: name: dpp_reg_sequence_first_combined 649# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 650# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 651# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 652# GCN: %8:vgpr_32 = IMPLICIT_DEF 653# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 654# GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1 655# GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 656# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec 657name: dpp_reg_sequence_first_combined 658tracksRegLiveness: true 659body: | 660 bb.0: 661 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 662 663 %0:vreg_64 = COPY $vgpr0_vgpr1 664 %1:vreg_64 = COPY $vgpr2_vgpr3 665 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 666 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 667 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 668 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 669 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 670 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 671... 672 673# GCN-LABEL: name: dpp_reg_sequence_second_combined 674# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 675# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 676# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 677# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 678# GCN: %8:vgpr_32 = IMPLICIT_DEF 679# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1 680# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec 681# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 682name: dpp_reg_sequence_second_combined 683tracksRegLiveness: true 684body: | 685 bb.0: 686 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 687 688 %0:vreg_64 = COPY $vgpr0_vgpr1 689 %1:vreg_64 = COPY $vgpr2_vgpr3 690 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 691 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 692 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 693 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 694 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 695 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 696... 697 698# GCN-LABEL: name: dpp_reg_sequence_none_combined 699# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 700# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 701# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 702# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 703# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 704# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 705# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec 706# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec 707name: dpp_reg_sequence_none_combined 708tracksRegLiveness: true 709body: | 710 bb.0: 711 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 712 713 %0:vreg_64 = COPY $vgpr0_vgpr1 714 %1:vreg_64 = COPY $vgpr2_vgpr3 715 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 716 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 717 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 718 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 719 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 720 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 721... 722 723# GCN-LABEL: name: dpp_reg_sequence_exec_changed 724# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 725# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 726# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 727# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 728# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 729# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 730# GCN: S_BRANCH %bb.1 731# GCN: bb.1: 732# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec 733# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec 734name: dpp_reg_sequence_exec_changed 735tracksRegLiveness: true 736body: | 737 bb.0: 738 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 739 740 %0:vreg_64 = COPY $vgpr0_vgpr1 741 %1:vreg_64 = COPY $vgpr2_vgpr3 742 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 743 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 744 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 745 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 746 S_BRANCH %bb.1 747 748 bb.1: 749 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 750 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 751... 752 753# GCN-LABEL: name: dpp_reg_sequence_subreg 754# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 755# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 756# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 757# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 758# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 759# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 760# GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1 761# GCN: %7:vgpr_32 = V_ADD_CO_U32_e32 %6.sub0, %2, implicit-def $vcc, implicit $exec 762# GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec 763name: dpp_reg_sequence_subreg 764tracksRegLiveness: true 765body: | 766 bb.0: 767 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 768 769 %0:vreg_64 = COPY $vgpr0_vgpr1 770 %1:vreg_64 = COPY $vgpr2_vgpr3 771 %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 772 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 773 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 774 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 775 %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1 776 %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %8, implicit-def $vcc, implicit $exec 777 %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %8, implicit-def $vcc, implicit $vcc, implicit $exec 778... 779 780# GCN-LABEL: name: dpp64_add64_impdef 781# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp %1.sub0, %0.sub0, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 782# GCN: %5:vgpr_32 = V_ADDC_U32_dpp %1.sub1, %0.sub1, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 783name: dpp64_add64_impdef 784tracksRegLiveness: true 785body: | 786 bb.0: 787 %0:vreg_64 = IMPLICIT_DEF 788 %1:vreg_64 = IMPLICIT_DEF 789 %2:vreg_64 = V_MOV_B64_DPP_PSEUDO %1:vreg_64, %0:vreg_64, 1, 15, 15, 1, implicit $exec 790 %5:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec 791 %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec 792... 793 794# GCN-LABEL: name: dpp64_add64_undef 795# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 796# GCN: %5:vgpr_32 = V_ADDC_U32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 797name: dpp64_add64_undef 798tracksRegLiveness: true 799body: | 800 bb.0: 801 %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec 802 %5:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec 803 %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec 804... 805 806# GCN-LABEL: name: dpp64_add64_first_combined 807# GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec 808# GCN: %0:vreg_64 = REG_SEQUENCE undef %7:vgpr_32, %subreg.sub0, %8, %subreg.sub1 809# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 810# GCN: %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %0.sub1, undef $vcc, 0, implicit $exec 811name: dpp64_add64_first_combined 812tracksRegLiveness: true 813body: | 814 bb.0: 815 %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec 816 %4:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %3:vgpr_32, implicit-def $vcc, implicit $exec 817 %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %2.sub1, undef $vcc, 0, implicit $exec 818... 819 820# GCN-LABEL: name: dont_combine_cndmask_with_src2 821# GCN: %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec 822name: dont_combine_cndmask_with_src2 823tracksRegLiveness: true 824body: | 825 bb.0: 826 liveins: $vgpr0, $vgpr1 827 %0:vgpr_32 = COPY $vgpr0 828 %1:vgpr_32 = COPY $vgpr1 829 %2:vgpr_32 = IMPLICIT_DEF 830 831 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 832 %4:sreg_64_xexec = IMPLICIT_DEF 833 %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec 834... 835 836--- 837 838# Make sure flags aren't dropped 839# GCN-LABEL: name: flags_add_f32_e64 840# GCN: %4:vgpr_32 = nnan nofpexcept V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec 841name: flags_add_f32_e64 842tracksRegLiveness: true 843body: | 844 bb.0: 845 liveins: $vgpr0, $vgpr1 846 847 %0:vgpr_32 = COPY $vgpr0 848 %1:vgpr_32 = COPY $vgpr1 849 %2:vgpr_32 = IMPLICIT_DEF 850 851 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 852 %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec 853 S_ENDPGM 0, implicit %4 854 855... 856 857# GCN-LABEL: name: dont_combine_more_than_one_operand 858# GCN: %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec 859name: dont_combine_more_than_one_operand 860tracksRegLiveness: true 861body: | 862 bb.0: 863 liveins: $vgpr0, $vgpr1 864 %0:vgpr_32 = COPY $vgpr0 865 %1:vgpr_32 = COPY $vgpr1 866 %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec 867 %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec 868... 869 870# GCN-LABEL: name: dont_combine_more_than_one_operand_dpp_reg_sequence 871# GCN: %5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec 872# GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec 873name: dont_combine_more_than_one_operand_dpp_reg_sequence 874tracksRegLiveness: true 875body: | 876 bb.0: 877 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 878 %0:vreg_64 = COPY $vgpr0_vgpr1 879 %1:vreg_64 = COPY $vgpr2_vgpr3 880 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 881 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 882 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 883 %5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec 884 %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec 885... 886