1# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN 2 3--- 4# old is undefined: only combine when masks are fully enabled and 5# bound_ctrl:1 is set, otherwise the result of DPP VALU op can be undefined. 6# GCN-LABEL: name: old_is_undef 7# GCN: %2:vgpr_32 = IMPLICIT_DEF 8# VOP2: 9# GCN: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec 10# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec 11# GCN: %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec 12# GCN: %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec 13# VOP1: 14# GCN: %12:vgpr_32 = V_NOT_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 15# GCN: %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec 16# GCN: %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec 17# GCN: %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec 18name: old_is_undef 19tracksRegLiveness: true 20body: | 21 bb.0: 22 liveins: $vgpr0, $vgpr1 23 %0:vgpr_32 = COPY $vgpr0 24 %1:vgpr_32 = COPY $vgpr1 25 %2:vgpr_32 = IMPLICIT_DEF 26 27 ; VOP2 28 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 29 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 30 31 %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 32 %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec 33 34 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 35 %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec 36 37 %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 38 %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec 39 40 ; VOP1 41 %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 42 %12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec 43 44 %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 45 %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec 46 47 %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 48 %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec 49 50 %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 51 %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec 52... 53 54# old is zero cases: 55 56# GCN-LABEL: name: old_is_0 57 58# VOP2: 59# case 1: old is zero, masks are fully enabled, bound_ctrl:1 is on: 60# the DPP mov result would be either zero ({src lane disabled}|{src lane is 61# out of range}) or active src lane result - can combine with old = undef. 62# undef is preffered as it makes life easier for the regalloc. 63# GCN: [[U1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 64# GCN: %4:vgpr_32 = V_ADD_U32_dpp [[U1]], %0, %1, 1, 15, 15, 1, implicit $exec 65 66# case 2: old is zero, masks are fully enabled, bound_ctrl:1 is off: 67# as the DPP mov old is zero this case is no different from case 1 - combine it 68# setting bound_ctrl:1 on for the combined DPP VALU op to make old undefined 69# GCN: [[U2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 70# GCN: %6:vgpr_32 = V_ADD_U32_dpp [[U2]], %0, %1, 1, 15, 15, 1, implicit $exec 71 72# case 3: masks are partialy disabled, bound_ctrl:1 is on: 73# the DPP mov result would be either zero ({src lane disabled}|{src lane is 74# out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or 75# active src lane result - can combine with old = src1 of the VALU op. 76# The VALU op should have the same masks as DPP mov as they select lanes 77# with identity value. 78# Special case: the bound_ctrl for the combined DPP VALU op isn't important 79# here but let's make it off to keep the combiner's logic simpler. 80# GCN: %8:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 81 82# case 4: masks are partialy disabled, bound_ctrl:1 is off: 83# the DPP mov result would be either zero ({src lane disabled}|{src lane is 84# out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or 85# active src lane result - can combine with old = src1 of the VALU op. 86# The VALU op should have the same masks as DPP mov as they select 87# lanes with identity value 88# GCN: %10:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 89 90# VOP1: 91# see case 1 92# GCN: [[U3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 93# GCN: %12:vgpr_32 = V_NOT_B32_dpp [[U3]], %0, 1, 15, 15, 1, implicit $exec 94# see case 2 95# GCN: [[U4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 96# GCN: %14:vgpr_32 = V_NOT_B32_dpp [[U4]], %0, 1, 15, 15, 1, implicit $exec 97# case 3 and 4 not appliable as there is no way to specify unchanged result 98# for the unary VALU op 99# GCN: %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec 100# GCN: %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec 101 102name: old_is_0 103tracksRegLiveness: true 104body: | 105 bb.0: 106 liveins: $vgpr0, $vgpr1 107 %0:vgpr_32 = COPY $vgpr0 108 %1:vgpr_32 = COPY $vgpr1 109 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 110 111 ; VOP2 112 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 113 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 114 115 %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 116 %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec 117 118 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 119 %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec 120 121 %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 122 %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec 123 124 ; VOP1 125 %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 126 %12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec 127 128 %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 129 %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec 130 131 %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 132 %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec 133 134 %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 135 %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec 136... 137 138# old is nonzero identity cases: 139 140# old is nonzero identity, masks are fully enabled, bound_ctrl:1 is off: 141# the DPP mov result would be either identity ({src lane disabled}|{out of 142# range}) or src lane result - can combine with old = src1 of the VALU op 143# The DPP VALU op should have the same masks (and bctrl) as DPP mov as they 144# select lanes with identity value 145 146# GCN-LABEL: name: nonzero_old_is_identity_masks_enabled_bctl_off 147# GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec 148# GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec 149# GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec 150# GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 15, 0, implicit $exec 151 152name: nonzero_old_is_identity_masks_enabled_bctl_off 153tracksRegLiveness: true 154body: | 155 bb.0: 156 liveins: $vgpr0, $vgpr1 157 %0:vgpr_32 = COPY $vgpr0 158 %1:vgpr_32 = COPY $vgpr1 159 160 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 161 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec 162 %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec 163 164 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 165 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 0, implicit $exec 166 %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec 167 168 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 169 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 15, 15, 0, implicit $exec 170 %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec 171 172 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 173 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 15, 0, implicit $exec 174 %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec 175... 176 177# old is nonzero identity, masks are partially enabled, bound_ctrl:1 is off: 178# the DPP mov result would be either identity ({src lane disabled}|{src lane is 179# out of range} or {the DPP mov's dest VGPR write is disabled by masks}) or 180# active src lane result - can combine with old = src1 of the VALU op. 181# The DPP VALU op should have the same masks (and bctrl) as DPP mov as they 182# select lanes with identity value 183 184# GCN-LABEL: name: nonzero_old_is_identity_masks_partially_disabled_bctl_off 185# GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 186# GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 187# GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 188# GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 189 190name: nonzero_old_is_identity_masks_partially_disabled_bctl_off 191tracksRegLiveness: true 192body: | 193 bb.0: 194 liveins: $vgpr0, $vgpr1 195 %0:vgpr_32 = COPY $vgpr0 196 %1:vgpr_32 = COPY $vgpr1 197 198 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 199 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 200 %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec 201 202 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 203 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec 204 %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec 205 206 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 207 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec 208 %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec 209 210 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 211 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec 212 %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec 213... 214 215# old is nonzero identity, masks are partially enabled, bound_ctrl:1 is on: 216# the DPP mov result may have 3 different values: 217# 1. the active src lane result 218# 2. 0 if the src lane is disabled|out of range 219# 3. DPP mov's old value if the mov's dest VGPR write is disabled by masks 220# can't combine 221 222# GCN-LABEL: name: nonzero_old_is_identity_masks_partially_disabled_bctl0 223# GCN: %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec 224# GCN: %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec 225# GCN: %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec 226# GCN: %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec 227 228name: nonzero_old_is_identity_masks_partially_disabled_bctl0 229tracksRegLiveness: true 230body: | 231 bb.0: 232 liveins: $vgpr0, $vgpr1 233 %0:vgpr_32 = COPY $vgpr0 234 %1:vgpr_32 = COPY $vgpr1 235 236 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 237 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec 238 %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec 239 240 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 241 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 1, implicit $exec 242 %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec 243 244 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 245 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 1, implicit $exec 246 %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec 247 248 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 249 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 1, implicit $exec 250 %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec 251... 252 253# when the DPP source isn't a src0 operand the operation should be commuted if possible 254# GCN-LABEL: name: dpp_commute 255# GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 256# GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 257# GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 258# GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec 259# GCN: %16:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 260# GCN: %19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec 261name: dpp_commute 262tracksRegLiveness: true 263body: | 264 bb.0: 265 liveins: $vgpr0, $vgpr1 266 267 %0:vgpr_32 = COPY $vgpr0 268 %1:vgpr_32 = COPY $vgpr1 269 270 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 271 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 272 %4:vgpr_32 = V_MUL_U32_U24_e32 %1, %3, implicit $exec 273 274 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec 275 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec 276 %7:vgpr_32 = V_AND_B32_e32 %1, %6, implicit $exec 277 278 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec 279 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec 280 %10:vgpr_32 = V_MAX_I32_e32 %1, %9, implicit $exec 281 282 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec 283 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec 284 %13:vgpr_32 = V_MIN_I32_e32 %1, %12, implicit $exec 285 286 %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 287 %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec 288 %16:vgpr_32 = V_SUB_CO_U32_e32 %1, %15, implicit-def $vcc, implicit $exec 289 290 ; this cannot be combined because immediate as src0 isn't commutable 291 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 292 %18:vgpr_32 = V_MOV_B32_dpp %17, %0, 1, 14, 15, 0, implicit $exec 293 %19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec 294... 295 296--- 297 298# check for floating point modifiers 299# GCN-LABEL: name: add_f32_e64 300# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 301# GCN: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec 302# GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec 303# GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec 304# GCN: %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec 305 306name: add_f32_e64 307tracksRegLiveness: true 308body: | 309 bb.0: 310 liveins: $vgpr0, $vgpr1 311 312 %0:vgpr_32 = COPY $vgpr0 313 %1:vgpr_32 = COPY $vgpr1 314 %2:vgpr_32 = IMPLICIT_DEF 315 316 ; this shouldn't be combined as omod is set 317 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 318 %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec 319 320 ; this should be combined as all modifiers are default 321 %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 322 %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec 323 324 ; this should be combined as modifiers other than abs|neg are default 325 %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 326 %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec 327 328 ; this shouldn't be combined as modifiers aren't abs|neg 329 %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 330 %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec 331... 332 333# check for e64 modifiers 334# GCN-LABEL: name: add_u32_e64 335# GCN: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec 336# GCN: %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec 337 338name: add_u32_e64 339tracksRegLiveness: true 340body: | 341 bb.0: 342 liveins: $vgpr0, $vgpr1 343 344 %0:vgpr_32 = COPY $vgpr0 345 %1:vgpr_32 = COPY $vgpr1 346 %2:vgpr_32 = IMPLICIT_DEF 347 348 ; this should be combined as all modifiers are default 349 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec 350 %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec 351 352 ; this shouldn't be combined as clamp is set 353 %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec 354 %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec 355... 356 357# GCN-LABEL: name: add_co_u32_e64 358# GCN: %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, %1, 0, implicit $exec 359 360name: add_co_u32_e64 361tracksRegLiveness: true 362body: | 363 bb.0: 364 liveins: $vgpr0, $vgpr1 365 366 %0:vgpr_32 = COPY $vgpr0 367 %1:vgpr_32 = COPY $vgpr1 368 %2:vgpr_32 = IMPLICIT_DEF 369 370 ; this shouldn't be combined as the carry-out is used 371 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec 372 %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, %1, 0, implicit $exec 373 374 S_NOP 0, implicit %5 375... 376 377# tests on sequences of dpp consumers 378# GCN-LABEL: name: dpp_seq 379# GCN: %4:vgpr_32 = V_ADD_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 380# GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 381# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 382# broken sequence: 383# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 384 385name: dpp_seq 386tracksRegLiveness: true 387body: | 388 bb.0: 389 liveins: $vgpr0, $vgpr1 390 %0:vgpr_32 = COPY $vgpr0 391 %1:vgpr_32 = COPY $vgpr1 392 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 393 394 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 395 %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec 396 %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec 397 %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec 398 399 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 400 %8:vgpr_32 = V_ADD_CO_U32_e32 %7, %1, implicit-def $vcc, implicit $exec 401 ; this breaks the sequence 402 %9:vgpr_32 = V_SUB_CO_U32_e32 5, %7, implicit-def $vcc, implicit $exec 403... 404 405# tests on sequences of dpp consumers followed by control flow 406# GCN-LABEL: name: dpp_seq_cf 407# GCN: %4:vgpr_32 = V_ADD_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 408# GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec 409# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec 410 411name: dpp_seq_cf 412tracksRegLiveness: true 413body: | 414 bb.0: 415 successors: %bb.1, %bb.2 416 liveins: $vgpr0, $vgpr1 417 %0:vgpr_32 = COPY $vgpr0 418 %1:vgpr_32 = COPY $vgpr1 419 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 420 421 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec 422 %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec 423 %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec 424 %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec 425 426 %7:sreg_64 = V_CMP_EQ_U32_e64 %5, %6, implicit $exec 427 %8:sreg_64 = SI_IF %7, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec 428 S_BRANCH %bb.1 429 430 bb.1: 431 successors: %bb.2 432 433 bb.2: 434 SI_END_CF %8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec 435... 436 437# GCN-LABEL: name: old_in_diff_bb 438# GCN: %4:vgpr_32 = V_ADD_U32_dpp %0, %1, %0, 1, 1, 1, 0, implicit $exec 439 440name: old_in_diff_bb 441tracksRegLiveness: true 442body: | 443 bb.0: 444 successors: %bb.1 445 liveins: $vgpr0, $vgpr1 446 447 %0:vgpr_32 = COPY $vgpr0 448 %1:vgpr_32 = COPY $vgpr1 449 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 450 S_BRANCH %bb.1 451 452 bb.1: 453 %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec 454 %4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec 455... 456 457# old reg def is in diff BB but bound_ctrl:1 - can combine 458# GCN-LABEL: name: old_in_diff_bb_bctrl_zero 459# GCN: %4:vgpr_32 = V_ADD_U32_dpp {{%[0-9]}}, %0, %1, 1, 15, 15, 1, implicit $exec 460 461name: old_in_diff_bb_bctrl_zero 462tracksRegLiveness: true 463body: | 464 bb.0: 465 successors: %bb.1 466 liveins: $vgpr0, $vgpr1 467 468 %0:vgpr_32 = COPY $vgpr0 469 %1:vgpr_32 = COPY $vgpr1 470 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 471 S_BRANCH %bb.1 472 473 bb.1: 474 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 475 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 476... 477 478# EXEC mask changed between def and use - cannot combine 479# GCN-LABEL: name: exec_changed 480# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 481 482name: exec_changed 483tracksRegLiveness: true 484body: | 485 bb.0: 486 liveins: $vgpr0, $vgpr1 487 488 %0:vgpr_32 = COPY $vgpr0 489 %1:vgpr_32 = COPY $vgpr1 490 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 491 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 492 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 493 %5:sreg_64 = COPY $exec, implicit-def $exec 494 %6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec 495... 496 497# test if $old definition is correctly tracked through subreg manipulation pseudos 498 499# GCN-LABEL: name: mul_old_subreg 500# GCN: %7:vgpr_32 = V_MUL_I32_I24_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec 501 502name: mul_old_subreg 503tracksRegLiveness: true 504body: | 505 bb.0: 506 liveins: $vgpr0, $vgpr1 507 508 %0:vreg_64 = COPY $vgpr0 509 %1:vgpr_32 = COPY $vgpr1 510 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 511 %3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec 512 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 513 %5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4 514 %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec 515 %7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec 516... 517 518# GCN-LABEL: name: add_old_subreg 519# GCN: %5:vgpr_32 = V_ADD_U32_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec 520 521name: add_old_subreg 522tracksRegLiveness: true 523body: | 524 bb.0: 525 liveins: $vgpr0, $vgpr1 526 527 %0:vreg_64 = COPY $vgpr0 528 %1:vgpr_32 = COPY $vgpr1 529 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 530 %3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted 531 %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec 532 %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec 533... 534 535# GCN-LABEL: name: add_old_subreg_undef 536# GCN: %5:vgpr_32 = V_ADD_U32_dpp undef %3.sub1, %1, %0.sub1, 1, 15, 15, 1, implicit $exec 537 538name: add_old_subreg_undef 539tracksRegLiveness: true 540body: | 541 bb.0: 542 liveins: $vgpr0, $vgpr1 543 544 %0:vreg_64 = COPY $vgpr0 545 %1:vgpr_32 = COPY $vgpr1 546 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec 547 %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef 548 %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec 549 %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec 550... 551 552# Test instruction which does not have modifiers in VOP1 form but does in DPP form. 553# GCN-LABEL: name: dpp_vop1 554# GCN: %3:vgpr_32 = V_CEIL_F32_dpp %0, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec 555name: dpp_vop1 556tracksRegLiveness: true 557body: | 558 bb.0: 559 %1:vgpr_32 = IMPLICIT_DEF 560 %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 561 %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec 562... 563 564# Test instruction which does not have modifiers in VOP2 form but does in DPP form. 565# GCN-LABEL: name: dpp_min 566# GCN: %3:vgpr_32 = V_MIN_F32_dpp %0, 0, undef %2:vgpr_32, 0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec 567name: dpp_min 568tracksRegLiveness: true 569body: | 570 bb.0: 571 %1:vgpr_32 = IMPLICIT_DEF 572 %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 573 %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec 574... 575 576# Test an undef old operand 577# GCN-LABEL: name: dpp_undef_old 578# GCN: %3:vgpr_32 = V_CEIL_F32_dpp undef %1:vgpr_32, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec 579name: dpp_undef_old 580tracksRegLiveness: true 581body: | 582 bb.0: 583 %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 584 %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec 585... 586 587# Do not combine a dpp mov which writes a physreg. 588# GCN-LABEL: name: phys_dpp_mov_dst 589# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec 590# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec 591name: phys_dpp_mov_dst 592tracksRegLiveness: true 593body: | 594 bb.0: 595 $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 596 %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec 597... 598 599# Do not combine a dpp mov which reads a physreg. 600# GCN-LABEL: name: phys_dpp_mov_old_src 601# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec 602# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec 603name: phys_dpp_mov_old_src 604tracksRegLiveness: true 605body: | 606 bb.0: 607 %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec 608 %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec 609... 610 611# Do not combine a dpp mov which reads a physreg. 612# GCN-LABEL: name: phys_dpp_mov_src 613# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec 614# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec 615name: phys_dpp_mov_src 616tracksRegLiveness: true 617body: | 618 bb.0: 619 %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec 620 %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec 621... 622 623# GCN-LABEL: name: dpp_reg_sequence_both_combined 624# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 625# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 626# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 627# GCN: %9:vgpr_32 = IMPLICIT_DEF 628# GCN: %8:vgpr_32 = IMPLICIT_DEF 629# GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 630# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 631name: dpp_reg_sequence_both_combined 632tracksRegLiveness: true 633body: | 634 bb.0: 635 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 636 637 %0:vreg_64 = COPY $vgpr0_vgpr1 638 %1:vreg_64 = COPY $vgpr2_vgpr3 639 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 640 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 641 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 642 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 643 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 644 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 645... 646 647# GCN-LABEL: name: dpp_reg_sequence_first_combined 648# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 649# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 650# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 651# GCN: %8:vgpr_32 = IMPLICIT_DEF 652# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 653# GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1 654# GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 655# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec 656name: dpp_reg_sequence_first_combined 657tracksRegLiveness: true 658body: | 659 bb.0: 660 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 661 662 %0:vreg_64 = COPY $vgpr0_vgpr1 663 %1:vreg_64 = COPY $vgpr2_vgpr3 664 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 665 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 666 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 667 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 668 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 669 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 670... 671 672# GCN-LABEL: name: dpp_reg_sequence_second_combined 673# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 674# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 675# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 676# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 677# GCN: %8:vgpr_32 = IMPLICIT_DEF 678# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1 679# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec 680# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 681name: dpp_reg_sequence_second_combined 682tracksRegLiveness: true 683body: | 684 bb.0: 685 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 686 687 %0:vreg_64 = COPY $vgpr0_vgpr1 688 %1:vreg_64 = COPY $vgpr2_vgpr3 689 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 690 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 691 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 692 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 693 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 694 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 695... 696 697# GCN-LABEL: name: dpp_reg_sequence_none_combined 698# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 699# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 700# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 701# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 702# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 703# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 704# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec 705# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec 706name: dpp_reg_sequence_none_combined 707tracksRegLiveness: true 708body: | 709 bb.0: 710 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 711 712 %0:vreg_64 = COPY $vgpr0_vgpr1 713 %1:vreg_64 = COPY $vgpr2_vgpr3 714 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 715 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec 716 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec 717 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 718 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 719 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 720... 721 722# GCN-LABEL: name: dpp_reg_sequence_exec_changed 723# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 724# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 725# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 726# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 727# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 728# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 729# GCN: S_BRANCH %bb.1 730# GCN: bb.1: 731# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec 732# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec 733name: dpp_reg_sequence_exec_changed 734tracksRegLiveness: true 735body: | 736 bb.0: 737 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 738 739 %0:vreg_64 = COPY $vgpr0_vgpr1 740 %1:vreg_64 = COPY $vgpr2_vgpr3 741 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 742 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 743 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 744 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 745 S_BRANCH %bb.1 746 747 bb.1: 748 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec 749 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec 750... 751 752# GCN-LABEL: name: dpp_reg_sequence_subreg 753# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 754# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 755# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 756# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 757# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 758# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 759# GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1 760# GCN: %7:vgpr_32 = V_ADD_CO_U32_e32 %6.sub0, %2, implicit-def $vcc, implicit $exec 761# GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec 762name: dpp_reg_sequence_subreg 763tracksRegLiveness: true 764body: | 765 bb.0: 766 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 767 768 %0:vreg_64 = COPY $vgpr0_vgpr1 769 %1:vreg_64 = COPY $vgpr2_vgpr3 770 %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec 771 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 772 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 773 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 774 %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1 775 %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %8, implicit-def $vcc, implicit $exec 776 %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %8, implicit-def $vcc, implicit $vcc, implicit $exec 777... 778 779# GCN-LABEL: name: dpp64_add64_impdef 780# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp %1.sub0, %0.sub0, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 781# GCN: %5:vgpr_32 = V_ADDC_U32_dpp %1.sub1, %0.sub1, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 782name: dpp64_add64_impdef 783tracksRegLiveness: true 784body: | 785 bb.0: 786 %0:vreg_64 = IMPLICIT_DEF 787 %1:vreg_64 = IMPLICIT_DEF 788 %2:vreg_64 = V_MOV_B64_DPP_PSEUDO %1:vreg_64, %0:vreg_64, 1, 15, 15, 1, implicit $exec 789 %5:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec 790 %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec 791... 792 793# GCN-LABEL: name: dpp64_add64_undef 794# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 795# GCN: %5:vgpr_32 = V_ADDC_U32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec 796name: dpp64_add64_undef 797tracksRegLiveness: true 798body: | 799 bb.0: 800 %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec 801 %5:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec 802 %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec 803... 804 805# GCN-LABEL: name: dpp64_add64_first_combined 806# GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec 807# GCN: %0:vreg_64 = REG_SEQUENCE undef %7:vgpr_32, %subreg.sub0, %8, %subreg.sub1 808# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec 809# GCN: %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %0.sub1, undef $vcc, 0, implicit $exec 810name: dpp64_add64_first_combined 811tracksRegLiveness: true 812body: | 813 bb.0: 814 %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec 815 %4:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %3:vgpr_32, implicit-def $vcc, implicit $exec 816 %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %2.sub1, undef $vcc, 0, implicit $exec 817... 818 819# GCN-LABEL: name: dont_combine_cndmask_with_src2 820# GCN: %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec 821name: dont_combine_cndmask_with_src2 822tracksRegLiveness: true 823body: | 824 bb.0: 825 liveins: $vgpr0, $vgpr1 826 %0:vgpr_32 = COPY $vgpr0 827 %1:vgpr_32 = COPY $vgpr1 828 %2:vgpr_32 = IMPLICIT_DEF 829 830 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec 831 %4:sreg_64_xexec = IMPLICIT_DEF 832 %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec 833... 834 835--- 836 837# Make sure flags aren't dropped 838# GCN-LABEL: name: flags_add_f32_e64 839# GCN: %4:vgpr_32 = nnan nofpexcept V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec 840name: flags_add_f32_e64 841tracksRegLiveness: true 842body: | 843 bb.0: 844 liveins: $vgpr0, $vgpr1 845 846 %0:vgpr_32 = COPY $vgpr0 847 %1:vgpr_32 = COPY $vgpr1 848 %2:vgpr_32 = IMPLICIT_DEF 849 850 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec 851 %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec 852 S_ENDPGM 0, implicit %4 853 854... 855 856# GCN-LABEL: name: dont_combine_more_than_one_operand 857# GCN: %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec 858name: dont_combine_more_than_one_operand 859tracksRegLiveness: true 860body: | 861 bb.0: 862 liveins: $vgpr0, $vgpr1 863 %0:vgpr_32 = COPY $vgpr0 864 %1:vgpr_32 = COPY $vgpr1 865 %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec 866 %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec 867... 868 869# GCN-LABEL: name: dont_combine_more_than_one_operand_dpp_reg_sequence 870# GCN: %5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec 871# GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec 872name: dont_combine_more_than_one_operand_dpp_reg_sequence 873tracksRegLiveness: true 874body: | 875 bb.0: 876 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 877 %0:vreg_64 = COPY $vgpr0_vgpr1 878 %1:vreg_64 = COPY $vgpr2_vgpr3 879 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec 880 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec 881 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 882 %5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec 883 %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec 884... 885 886# execMayBeModifiedBeforeAnyUse used to assert if the queried 887# V_MOV_B32_dpp was the last instruction in the block. 888--- 889name: mov_dpp_last_block_inst 890tracksRegLiveness: true 891body: | 892 ; GCN-LABEL: name: mov_dpp_last_block_inst 893 ; GCN: bb.0: 894 ; GCN-NEXT: successors: %bb.1(0x80000000) 895 ; GCN-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8 896 ; GCN-NEXT: {{ $}} 897 ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr8 898 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 899 ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 900 ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 901 ; GCN-NEXT: {{ $}} 902 ; GCN-NEXT: bb.1: 903 ; GCN-NEXT: successors: %bb.2(0x80000000) 904 ; GCN-NEXT: {{ $}} 905 ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %5, %bb.2 906 ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[PHI]], 323, 15, 15, 0, implicit $exec 907 ; GCN-NEXT: {{ $}} 908 ; GCN-NEXT: bb.2: 909 ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 910 ; GCN-NEXT: {{ $}} 911 ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF2]], implicit $exec 912 ; GCN-NEXT: V_CMP_NE_U32_e32 1, [[V_CNDMASK_B32_e64_]], implicit-def $vcc, implicit $exec 913 ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc 914 ; GCN-NEXT: S_BRANCH %bb.3 915 ; GCN-NEXT: {{ $}} 916 ; GCN-NEXT: bb.3: 917 ; GCN-NEXT: S_ENDPGM 0 918 bb.0: 919 liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8 920 921 %0:sgpr_32 = COPY $sgpr8 922 %1:vgpr_32 = IMPLICIT_DEF 923 %2:sreg_32 = IMPLICIT_DEF 924 %3:sreg_64_xexec = IMPLICIT_DEF 925 926 bb.1: 927 %4:vgpr_32 = PHI %1, %bb.0, %5, %bb.2 928 %5:vgpr_32 = V_MOV_B32_dpp %1, %4, 323, 15, 15, 0, implicit $exec 929 930 bb.2: 931 %6:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %3, implicit $exec 932 V_CMP_NE_U32_e32 1, %6, implicit-def $vcc, implicit $exec 933 S_CBRANCH_VCCNZ %bb.1, implicit $vcc 934 S_BRANCH %bb.3 935 936 bb.3: 937 S_ENDPGM 0 938 939... 940