1# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -verify-machineinstrs %s -o - | FileCheck -check-prefixes=FULL,ALL %s 2# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefixes=SIMPLE,ALL %s 3 4 5--- 6# ALL-LABEL: name: kernel0 7# FULL: machineFunctionInfo: 8# FULL-NEXT: explicitKernArgSize: 128 9# FULL-NEXT: maxKernArgAlign: 64 10# FULL-NEXT: ldsSize: 2048 11# FULL-NEXT: dynLDSAlign: 1 12# FULL-NEXT: isEntryFunction: true 13# FULL-NEXT: noSignedZerosFPMath: false 14# FULL-NEXT: memoryBound: true 15# FULL-NEXT: waveLimiter: true 16# FULL-NEXT: hasSpilledSGPRs: false 17# FULL-NEXT: hasSpilledVGPRs: false 18# FULL-NEXT: scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11' 19# FULL-NEXT: frameOffsetReg: '$sgpr12' 20# FULL-NEXT: stackPtrOffsetReg: '$sgpr13' 21# FULL-NEXT: argumentInfo: 22# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 23# FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } 24# FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } 25# FULL-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } 26# FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } 27# FULL-NEXT: workGroupIDX: { reg: '$sgpr6' } 28# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' } 29# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' } 30# FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } 31# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } 32# FULL-NEXT: workItemIDX: { reg: '$vgpr0' } 33# FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } 34# FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } 35# FULL-NEXT: mode: 36# FULL-NEXT: ieee: true 37# FULL-NEXT: dx10-clamp: true 38# FULL-NEXT: fp32-input-denormals: true 39# FULL-NEXT: fp32-output-denormals: true 40# FULL-NEXT: fp64-fp16-input-denormals: true 41# FULL-NEXT: fp64-fp16-output-denormals: true 42# FULL-NEXT: highBitsOf32BitAddress: 0 43# FULL-NEXT: occupancy: 10 44# FULL-NEXT: body: 45 46# SIMPLE: machineFunctionInfo: 47# SIMPLE-NEXT: explicitKernArgSize: 128 48# SIMPLE-NEXT: maxKernArgAlign: 64 49# SIMPLE-NEXT: ldsSize: 2048 50# SIMPLE-NEXT: isEntryFunction: true 51# SIMPLE-NEXT: memoryBound: true 52# SIMPLE-NEXT: waveLimiter: true 53# SIMPLE-NEXT: scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11' 54# SIMPLE-NEXT: frameOffsetReg: '$sgpr12' 55# SIMPLE-NEXT: stackPtrOffsetReg: '$sgpr13' 56# SIMPLE-NEXT: argumentInfo: 57# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 58# SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } 59# SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } 60# SIMPLE-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } 61# SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } 62# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' } 63# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' } 64# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' } 65# SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } 66# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } 67# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' } 68# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } 69# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } 70# SIMPLE-NEXT: occupancy: 10 71# SIMPLE-NEXT: body: 72name: kernel0 73machineFunctionInfo: 74 explicitKernArgSize: 128 75 maxKernArgAlign: 64 76 ldsSize: 2048 77 isEntryFunction: true 78 noSignedZerosFPMath: false 79 memoryBound: true 80 waveLimiter: true 81 scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11' 82 frameOffsetReg: '$sgpr12' 83 stackPtrOffsetReg: '$sgpr13' 84 argumentInfo: 85 privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 86 kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } 87 workGroupIDX: { reg: '$sgpr6' } 88 privateSegmentWaveByteOffset: { reg: '$sgpr7' } 89 workItemIDX: { reg: '$vgpr0' } 90body: | 91 bb.0: 92 S_ENDPGM 0 93 94... 95 96# FIXME: Should be able to not print section for simple 97--- 98# ALL-LABEL: name: no_mfi 99# FULL: machineFunctionInfo: 100# FULL-NEXT: explicitKernArgSize: 0 101# FULL-NEXT: maxKernArgAlign: 1 102# FULL-NEXT: ldsSize: 0 103# FULL-NEXT: dynLDSAlign: 1 104# FULL-NEXT: isEntryFunction: false 105# FULL-NEXT: noSignedZerosFPMath: false 106# FULL-NEXT: memoryBound: false 107# FULL-NEXT: waveLimiter: false 108# FULL-NEXT: hasSpilledSGPRs: false 109# FULL-NEXT: hasSpilledVGPRs: false 110# FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg' 111# FULL-NEXT: frameOffsetReg: '$fp_reg' 112# FULL-NEXT: stackPtrOffsetReg: '$sp_reg' 113# FULL-NEXT: argumentInfo: 114# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 115# FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } 116# FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } 117# FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } 118# FULL-NEXT: workGroupIDX: { reg: '$sgpr12' } 119# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' } 120# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' } 121# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } 122# FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } 123# FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } 124# FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } 125# FULL-NEXT: mode: 126# FULL-NEXT: ieee: true 127# FULL-NEXT: dx10-clamp: true 128# FULL-NEXT: fp32-input-denormals: true 129# FULL-NEXT: fp32-output-denormals: true 130# FULL-NEXT: fp64-fp16-input-denormals: true 131# FULL-NEXT: fp64-fp16-output-denormals: true 132# FULL-NEXT: highBitsOf32BitAddress: 0 133# FULL-NEXT: occupancy: 10 134# FULL-NEXT: body: 135 136# SIMPLE: machineFunctionInfo: 137# SIMPLE-NEXT: maxKernArgAlign: 1 138# SIMPLE-NEXT: argumentInfo: 139# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 140# SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } 141# SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } 142# SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } 143# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' } 144# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' } 145# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' } 146# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } 147# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } 148# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } 149# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } 150# SIMPLE-NEXT: occupancy: 10 151# SIMPLE-NEXT: body: 152 153name: no_mfi 154body: | 155 bb.0: 156 S_ENDPGM 0 157 158... 159 160--- 161# ALL-LABEL: name: empty_mfi 162# FULL: machineFunctionInfo: 163# FULL-NEXT: explicitKernArgSize: 0 164# FULL-NEXT: maxKernArgAlign: 1 165# FULL-NEXT: ldsSize: 0 166# FULL-NEXT: dynLDSAlign: 1 167# FULL-NEXT: isEntryFunction: false 168# FULL-NEXT: noSignedZerosFPMath: false 169# FULL-NEXT: memoryBound: false 170# FULL-NEXT: waveLimiter: false 171# FULL-NEXT: hasSpilledSGPRs: false 172# FULL-NEXT: hasSpilledVGPRs: false 173# FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg' 174# FULL-NEXT: frameOffsetReg: '$fp_reg' 175# FULL-NEXT: stackPtrOffsetReg: '$sp_reg' 176# FULL-NEXT: argumentInfo: 177# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 178# FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } 179# FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } 180# FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } 181# FULL-NEXT: workGroupIDX: { reg: '$sgpr12' } 182# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' } 183# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' } 184# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } 185# FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } 186# FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } 187# FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } 188# FULL-NEXT: mode: 189# FULL-NEXT: ieee: true 190# FULL-NEXT: dx10-clamp: true 191# FULL-NEXT: fp32-input-denormals: true 192# FULL-NEXT: fp32-output-denormals: true 193# FULL-NEXT: fp64-fp16-input-denormals: true 194# FULL-NEXT: fp64-fp16-output-denormals: true 195# FULL-NEXT: highBitsOf32BitAddress: 0 196# FULL-NEXT: occupancy: 10 197# FULL-NEXT: body: 198 199# SIMPLE: machineFunctionInfo: 200# SIMPLE-NEXT: maxKernArgAlign: 1 201# SIMPLE-NEXT: argumentInfo: 202# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 203# SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } 204# SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } 205# SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } 206# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' } 207# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' } 208# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' } 209# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } 210# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } 211# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } 212# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } 213# SIMPLE-NEXT: occupancy: 10 214# SIMPLE-NEXT: body: 215 216name: empty_mfi 217machineFunctionInfo: 218body: | 219 bb.0: 220 S_ENDPGM 0 221 222... 223 224--- 225# ALL-LABEL: name: empty_mfi_entry_func 226# FULL: machineFunctionInfo: 227# FULL-NEXT: explicitKernArgSize: 0 228# FULL-NEXT: maxKernArgAlign: 1 229# FULL-NEXT: ldsSize: 0 230# FULL-NEXT: dynLDSAlign: 1 231# FULL-NEXT: isEntryFunction: true 232# FULL-NEXT: noSignedZerosFPMath: false 233# FULL-NEXT: memoryBound: false 234# FULL-NEXT: waveLimiter: false 235# FULL-NEXT: hasSpilledSGPRs: false 236# FULL-NEXT: hasSpilledVGPRs: false 237# FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg' 238# FULL-NEXT: frameOffsetReg: '$fp_reg' 239# FULL-NEXT: stackPtrOffsetReg: '$sp_reg' 240# FULL-NEXT: argumentInfo: 241# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 242# FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } 243# FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } 244# FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } 245# FULL-NEXT: workGroupIDX: { reg: '$sgpr12' } 246# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' } 247# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' } 248# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } 249# FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } 250# FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } 251# FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } 252# FULL-NEXT: mode: 253# FULL-NEXT: ieee: true 254# FULL-NEXT: dx10-clamp: true 255# FULL-NEXT: fp32-input-denormals: true 256# FULL-NEXT: fp32-output-denormals: true 257# FULL-NEXT: fp64-fp16-input-denormals: true 258# FULL-NEXT: fp64-fp16-output-denormals: true 259# FULL-NEXT: highBitsOf32BitAddress: 0 260# FULL-NEXT: occupancy: 10 261# FULL-NEXT: body: 262 263# SIMPLE: machineFunctionInfo: 264# SIMPLE-NEXT: maxKernArgAlign: 1 265# SIMPLE-NEXT: isEntryFunction: true 266# SIMPLE-NEXT: argumentInfo: 267# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 268# SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } 269# SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } 270# SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } 271# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' } 272# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' } 273# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' } 274# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } 275# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } 276# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } 277# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } 278# SIMPLE-NEXT: occupancy: 10 279# SIMPLE-NEXT: body: 280 281name: empty_mfi_entry_func 282machineFunctionInfo: 283 isEntryFunction: true 284body: | 285 bb.0: 286 S_ENDPGM 0 287 288... 289 290--- 291# ALL-LABEL: name: default_regs_mfi 292 293# FULL: scratchRSrcReg: '$private_rsrc_reg' 294# FULL-NEXT: frameOffsetReg: '$fp_reg' 295# FULL-NEXT: stackPtrOffsetReg: '$sp_reg' 296 297# SIMPLE-NOT: scratchRSrcReg 298# SIMPLE-NOT:: stackPtrOffsetReg 299name: default_regs_mfi 300machineFunctionInfo: 301 scratchRSrcReg: '$private_rsrc_reg' 302 303body: | 304 bb.0: 305 S_ENDPGM 0 306 307... 308 309--- 310# ALL-LABEL: name: fake_stack_arginfo 311 312# FULL: argumentInfo: 313# FULL: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 314# FULL: flatScratchInit: { offset: 4 } 315# FULL: workItemIDY: { reg: '$vgpr0', mask: 65280 } 316 317# SIMPLE: argumentInfo: 318# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 319# SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } 320# SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } 321# SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } 322# SIMPLE-NEXT: flatScratchInit: { offset: 4 } 323# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' } 324# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' } 325# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' } 326# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } 327# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } 328# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 } 329# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } 330name: fake_stack_arginfo 331machineFunctionInfo: 332 argumentInfo: 333 flatScratchInit: { offset: 4 } 334 workItemIDY: { reg: '$vgpr0' , mask: 0xff00 } 335 336body: | 337 bb.0: 338 S_ENDPGM 0 339 340... 341 342--- 343# ALL-LABEL: name: parse_mode 344# ALL: mode: 345# ALL-NEXT: ieee: false 346# ALL-NEXT: dx10-clamp: false 347# ALL-NEXT: fp32-input-denormals: false 348# ALL-NEXT: fp32-output-denormals: false 349# ALL-NEXT: fp64-fp16-input-denormals: false 350# ALL-NEXT: fp64-fp16-output-denormals: false 351 352name: parse_mode 353machineFunctionInfo: 354 mode: 355 ieee: false 356 dx10-clamp: false 357 fp32-input-denormals: false 358 fp32-output-denormals: false 359 fp64-fp16-input-denormals: false 360 fp64-fp16-output-denormals: false 361 362body: | 363 bb.0: 364 S_ENDPGM 0 365 366... 367 368 369--- 370# ALL-LABEL: name: parse_spilled_regs 371# ALL: machineFunctionInfo: 372# ALL: hasSpilledSGPRs: true 373# ALL-NEXT: hasSpilledVGPRs: true 374 375name: parse_spilled_regs 376machineFunctionInfo: 377 hasSpilledSGPRs: true 378 hasSpilledVGPRs: true 379 380body: | 381 bb.0: 382 S_ENDPGM 0 383 384... 385 386--- 387# ALL-LABEL: name: dyn_lds_with_alignment 388 389# FULL: ldsSize: 0 390# FULL-NEXT: dynLDSAlign: 8 391 392# SIMPLE: dynLDSAlign: 8 393name: dyn_lds_with_alignment 394machineFunctionInfo: 395 dynLDSAlign: 8 396 397body: | 398 bb.0: 399 S_ENDPGM 0 400 401... 402 403--- 404# ALL-LABEL: name: occupancy_0 405# ALL: occupancy: 10 406name: occupancy_0 407machineFunctionInfo: 408 occupancy: 0 409 410body: | 411 bb.0: 412 S_ENDPGM 0 413 414... 415 416--- 417# ALL-LABEL: name: occupancy_3 418# ALL: occupancy: 3 419name: occupancy_3 420machineFunctionInfo: 421 occupancy: 3 422 423body: | 424 bb.0: 425 S_ENDPGM 0 426 427... 428 429--- 430# ALL-LABEL: name: scavenge_fi 431# ALL: scavengeFI: '%stack.0' 432name: scavenge_fi 433stack: 434 - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } 435machineFunctionInfo: 436 scavengeFI: '%stack.0' 437 438body: | 439 bb.0: 440 S_ENDPGM 0 441 442... 443