1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx1010 -stop-after=greedy < %s | FileCheck %s
3
4%llpc.array.element = type <{ i32, [12 x i8] }>
5%llpc.array.element.2 = type <{ i32, [12 x i8] }>
6%llpc.array.element.5 = type <{ i32, [12 x i8] }>
7
8define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x i32> inreg %userData) {
9  ; CHECK-LABEL: name: _amdgpu_gs_main
10  ; CHECK: bb.0..expVert:
11  ; CHECK:   liveins: $sgpr3, $sgpr4, $sgpr5, $sgpr8, $sgpr9, $sgpr10, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr25, $sgpr27, $sgpr31
12  ; CHECK:   undef %56.sub0:sgpr_64 = COPY $sgpr31
13  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr27
14  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr25
15  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr5
16  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr4
17  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr3
18  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr18
19  ; CHECK:   undef %50.sub0:sgpr_64 = COPY $sgpr19
20  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr20
21  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr21
22  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr22
23  ; CHECK:   [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr23
24  ; CHECK:   [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr9
25  ; CHECK:   [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr10
26  ; CHECK:   [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr8
27  ; CHECK:   undef %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %56, 232, 0, 0 :: (load 8 from %ir.40, addrspace 4)
28  ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
29  ; CHECK:   [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc
30  ; CHECK:   [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc
31  ; CHECK:   [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 4, implicit-def dead $scc
32  ; CHECK:   [[S_ASHR_I32_:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_]], 31, implicit-def dead $scc
33  ; CHECK:   [[S_ASHR_I32_1:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_1]], 31, implicit-def dead $scc
34  ; CHECK:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 %71.sub1, [[S_MOV_B32_]], implicit-def dead $scc
35  ; CHECK:   [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc
36  ; CHECK:   undef %130.sub0:sreg_64 = S_ADD_U32 [[COPY5]], [[S_LSHL_B32_2]], implicit-def $scc
37  ; CHECK:   %130.sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
38  ; CHECK:   early-clobber %73:sgpr_128, early-clobber %143:sgpr_128, early-clobber %131:sreg_32_xm0_xexec = BUNDLE %130, undef %132:sgpr_128, undef %74:sreg_64 {
39  ; CHECK:     [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %130, 16, 0, 0 :: (load 16 from %ir.84, addrspace 4)
40  ; CHECK:     [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0, 0 :: (load 16 from `<4 x i32> addrspace(4)* undef`, addrspace 4)
41  ; CHECK:     [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %132:sgpr_128, 0, 0, 0 :: (dereferenceable invariant load 4)
42  ; CHECK:   }
43  ; CHECK:   [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0, 0 :: (dereferenceable invariant load 4)
44  ; CHECK:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
45  ; CHECK:   %71.sub3:sgpr_128 = S_MOV_B32 553734060
46  ; CHECK:   %71.sub2:sgpr_128 = S_MOV_B32 -1
47  ; CHECK:   early-clobber %87:vgpr_32, early-clobber %117:vgpr_32, early-clobber %76:vgpr_32 = BUNDLE [[S_LOAD_DWORDX4_IMM1]], undef %118:sgpr_128, undef %89:sgpr_128, [[V_MOV_B32_e32_]], implicit $exec {
48  ; CHECK:     [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
49  ; CHECK:     [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
50  ; CHECK:     [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
51  ; CHECK:   }
52  ; CHECK:   [[COPY13:%[0-9]+]]:sgpr_128 = COPY %71
53  ; CHECK:   %71.sub1:sgpr_128 = S_MOV_B32 0
54  ; CHECK:   [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 29, implicit-def dead $scc
55  ; CHECK:   [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc
56  ; CHECK:   [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM1]], 31, implicit-def dead $scc
57  ; CHECK:   [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY5]], 64, implicit-def $scc
58  ; CHECK:   [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %54:sreg_32, 0, implicit-def dead $scc, implicit $scc
59  ; CHECK:   undef %149.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_]], implicit-def $scc
60  ; CHECK:   %149.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
61  ; CHECK:   undef %156.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_1]], implicit-def $scc
62  ; CHECK:   %156.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
63  ; CHECK:   undef %163.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_2]], implicit-def $scc
64  ; CHECK:   early-clobber %150:sgpr_128, early-clobber %157:sgpr_128 = BUNDLE %149, %156 {
65  ; CHECK:     [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %149, 0, 0, 0 :: (load 16 from %ir.91, addrspace 4)
66  ; CHECK:     [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %156, 0, 0, 0 :: (load 16 from %ir.97, addrspace 4)
67  ; CHECK:   }
68  ; CHECK:   %163.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
69  ; CHECK:   [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %171:sreg_32, 31, implicit-def dead $scc
70  ; CHECK:   undef %176.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], undef %171:sreg_32, implicit-def $scc
71  ; CHECK:   [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %163, 0, 0, 0 :: (load 16 from %ir.103, addrspace 4)
72  ; CHECK:   %176.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
73  ; CHECK:   undef %183.sub0:sreg_64 = S_ADD_U32 %50.sub0, [[S_LSHL_B32_]], implicit-def $scc
74  ; CHECK:   %183.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
75  ; CHECK:   undef %190.sub0:sreg_64 = S_ADD_U32 %50.sub0, [[S_LSHL_B32_1]], implicit-def $scc
76  ; CHECK:   %190.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
77  ; CHECK:   undef %200.sub0:sreg_64 = S_ADD_U32 %50.sub0, undef %171:sreg_32, implicit-def $scc
78  ; CHECK:   %200.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
79  ; CHECK:   [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 %50.sub0, 224, implicit-def $scc
80  ; CHECK:   [[S_ADDC_U32_1:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %51:sreg_32, 0, implicit-def dead $scc, implicit $scc
81  ; CHECK:   undef %210.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_]], implicit-def $scc
82  ; CHECK:   %210.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
83  ; CHECK:   undef %217.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_1]], implicit-def $scc
84  ; CHECK:   %217.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
85  ; CHECK:   undef %224.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_2]], implicit-def $scc
86  ; CHECK:   %224.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
87  ; CHECK:   [[S_ADD_U32_2:%[0-9]+]]:sreg_32 = S_ADD_U32 %50.sub0, 576, implicit-def $scc
88  ; CHECK:   [[S_ADDC_U32_2:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %51:sreg_32, 0, implicit-def dead $scc, implicit $scc
89  ; CHECK:   undef %241.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], [[S_LSHL_B32_]], implicit-def $scc
90  ; CHECK:   %241.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
91  ; CHECK:   undef %253.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], [[S_LSHL_B32_2]], implicit-def $scc
92  ; CHECK:   %253.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
93  ; CHECK:   undef %261.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], undef %171:sreg_32, implicit-def $scc
94  ; CHECK:   %261.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
95  ; CHECK:   undef %273.sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_]], implicit-def $scc
96  ; CHECK:   %273.sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
97  ; CHECK:   undef %286.sub0:sreg_64 = S_ADD_U32 [[COPY7]], [[S_LSHL_B32_1]], implicit-def $scc
98  ; CHECK:   %286.sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
99  ; CHECK:   undef %293.sub0:sreg_64 = S_ADD_U32 [[COPY7]], [[S_LSHL_B32_2]], implicit-def $scc
100  ; CHECK:   %293.sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
101  ; CHECK:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc
102  ; CHECK:   [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc
103  ; CHECK:   early-clobber %379:sreg_32_xm0_xexec, early-clobber %201:sgpr_128, early-clobber %177:sgpr_128, early-clobber %184:sgpr_128, early-clobber %319:sreg_32_xm0_xexec, early-clobber %191:sgpr_128, early-clobber %309:sreg_32_xm0_xexec, early-clobber %323:sreg_32_xm0_xexec, early-clobber %368:sreg_32_xm0_xexec, early-clobber %313:sreg_32_xm0_xexec, early-clobber %211:sgpr_128 = BUNDLE [[S_ADD_I32_]], %71, undef %369:sgpr_128, %210, undef %314:sreg_32, %200, undef %380:sgpr_128, %176, %183, [[S_ADD_I32_1]], %190, undef %370:sreg_32 {
104  ; CHECK:     [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load 4)
105  ; CHECK:     [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, undef %314:sreg_32, 0, 0 :: (dereferenceable invariant load 4)
106  ; CHECK:     [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load 4)
107  ; CHECK:     [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 16, 0, 0 :: (dereferenceable invariant load 4)
108  ; CHECK:     [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %369:sgpr_128, undef %370:sreg_32, 0, 0 :: (dereferenceable invariant load 4)
109  ; CHECK:     [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %380:sgpr_128, 16, 0, 0 :: (dereferenceable invariant load 4)
110  ; CHECK:     [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %176, 0, 0, 0 :: (load 16 from %ir.111, addrspace 4)
111  ; CHECK:     [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %183, 0, 0, 0 :: (load 16 from %ir.117, addrspace 4)
112  ; CHECK:     [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %190, 0, 0, 0 :: (load 16 from %ir.123, addrspace 4)
113  ; CHECK:     [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %200, 0, 0, 0 :: (load 16 from %ir.131, addrspace 4)
114  ; CHECK:     [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %210, 0, 0, 0 :: (load 16 from %ir.138, addrspace 4)
115  ; CHECK:   }
116  ; CHECK:   early-clobber %151:vgpr_32, early-clobber %158:vgpr_32, early-clobber %165:vgpr_32 = BUNDLE [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], [[S_LOAD_DWORDX4_IMM3]], [[S_LOAD_DWORDX4_IMM4]], implicit $exec {
117  ; CHECK:     [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
118  ; CHECK:     [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
119  ; CHECK:     [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
120  ; CHECK:   }
121  ; CHECK:   early-clobber %374:sreg_32_xm0_xexec, early-clobber %363:sreg_32_xm0_xexec = BUNDLE [[S_ADD_I32_]], undef %364:sgpr_128, undef %375:sgpr_128, [[S_ADD_I32_1]] {
122  ; CHECK:     [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %364:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load 4)
123  ; CHECK:     [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %375:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load 4)
124  ; CHECK:   }
125  ; CHECK:   [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR]], -98, implicit-def dead $scc
126  ; CHECK:   [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR1]], -114, implicit-def dead $scc
127  ; CHECK:   [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR2]], -130, implicit-def dead $scc
128  ; CHECK:   [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM2]], -178, implicit-def dead $scc
129  ; CHECK:   undef %327.sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc
130  ; CHECK:   %327.sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
131  ; CHECK:   undef %335.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_]], implicit-def $scc
132  ; CHECK:   %335.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
133  ; CHECK:   undef %343.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc
134  ; CHECK:   %343.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
135  ; CHECK:   undef %351.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc
136  ; CHECK:   %351.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
137  ; CHECK:   [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY10]], 4, implicit-def dead $scc
138  ; CHECK:   [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc
139  ; CHECK:   [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
140  ; CHECK:   [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %396:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load 4)
141  ; CHECK:   [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
142  ; CHECK:   early-clobber %218:sgpr_128, early-clobber %225:sgpr_128, early-clobber %231:sgpr_128 = BUNDLE %217, %224, %50 {
143  ; CHECK:     [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %50, 224, 0, 0 :: (load 16 from %ir.155, addrspace 4)
144  ; CHECK:     [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %217, 0, 0, 0 :: (load 16 from %ir.144, addrspace 4)
145  ; CHECK:     [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %224, 0, 0, 0 :: (load 16 from %ir.150, addrspace 4)
146  ; CHECK:   }
147  ; CHECK:   [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
148  ; CHECK:   early-clobber %254:sgpr_128, early-clobber %242:sgpr_128 = BUNDLE %253, %241 {
149  ; CHECK:     [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %241, 0, 0, 0 :: (load 16 from %ir.162, addrspace 4)
150  ; CHECK:     [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %253, 0, 0, 0 :: (load 16 from %ir.170, addrspace 4)
151  ; CHECK:   }
152  ; CHECK:   early-clobber %212:vgpr_32, early-clobber %202:vgpr_32 = BUNDLE [[S_LOAD_DWORDX4_IMM8]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], implicit $exec {
153  ; CHECK:     [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
154  ; CHECK:     [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
155  ; CHECK:   }
156  ; CHECK:   [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR4]], -217, implicit-def dead $scc
157  ; CHECK:   [[S_ADD_I32_8:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -233, implicit-def dead $scc
158  ; CHECK:   [[S_ADD_I32_9:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR5]], -249, implicit-def dead $scc
159  ; CHECK:   [[S_ADD_I32_10:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM3]], -297, implicit-def dead $scc
160  ; CHECK:   [[S_ADD_I32_11:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -313, implicit-def dead $scc
161  ; CHECK:   [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -329, implicit-def dead $scc
162  ; CHECK:   [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -345, implicit-def dead $scc
163  ; CHECK:   [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR6]], -441, implicit-def dead $scc
164  ; CHECK:   [[S_ADD_U32_3:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], 160, implicit-def $scc
165  ; CHECK:   [[S_ADDC_U32_3:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %36:sreg_32, 0, implicit-def dead $scc, implicit $scc
166  ; CHECK:   undef %411.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_2]], implicit-def $scc
167  ; CHECK:   %411.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
168  ; CHECK:   [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY11]], 4, implicit-def dead $scc
169  ; CHECK:   [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
170  ; CHECK:   [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc
171  ; CHECK:   undef %425.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_4]], implicit-def $scc
172  ; CHECK:   %425.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc
173  ; CHECK:   [[S_ADD_U32_4:%[0-9]+]]:sreg_32 = S_ADD_U32 %56.sub0, 168, implicit-def $scc
174  ; CHECK:   [[S_ADDC_U32_4:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %57:sreg_32, 0, implicit-def dead $scc, implicit $scc
175  ; CHECK:   [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc
176  ; CHECK:   [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
177  ; CHECK:   [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc
178  ; CHECK:   undef %441.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_5]], implicit-def $scc
179  ; CHECK:   %441.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc
180  ; CHECK:   [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
181  ; CHECK:   early-clobber %71.sub0:sgpr_128, early-clobber %262:sgpr_128 = BUNDLE %261, %441 {
182  ; CHECK:     internal %71.sub0:sgpr_128 = S_LOAD_DWORD_IMM %441, 0, 0, 0 :: (load 4 from %ir..i085.i, align 8, addrspace 4)
183  ; CHECK:     [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %261, 0, 0, 0 :: (load 16 from %ir.176, addrspace 4)
184  ; CHECK:   }
185  ; CHECK:   [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
186  ; CHECK:   [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %273, 0, 0, 0 :: (load 16 from %ir.185, addrspace 4)
187  ; CHECK:   [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
188  ; CHECK:   early-clobber %445:sreg_32_xm0_xexec, early-clobber %287:sgpr_128 = BUNDLE %71, %286 {
189  ; CHECK:     [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %286, 0, 0, 0 :: (load 16 from %ir.194, addrspace 4)
190  ; CHECK:     [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0, 0 :: (dereferenceable invariant load 4)
191  ; CHECK:   }
192  ; CHECK:   [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
193  ; CHECK:   [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %293, 0, 0, 0 :: (load 16 from %ir.200, addrspace 4)
194  ; CHECK:   early-clobber %281:vgpr_32, early-clobber %275:vgpr_32 = BUNDLE [[S_LOAD_DWORDX4_IMM1]], [[S_LOAD_DWORDX4_IMM16]], [[V_MOV_B32_e32_]], implicit $exec {
195  ; CHECK:     [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
196  ; CHECK:     [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
197  ; CHECK:   }
198  ; CHECK:   [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc
199  ; CHECK:   [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
200  ; CHECK:   [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc
201  ; CHECK:   [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc
202  ; CHECK:   undef %453.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_6]], implicit-def $scc
203  ; CHECK:   %453.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc
204  ; CHECK:   %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %453, 0, 0, 0 :: (load 8 from %ir.304, addrspace 4)
205  ; CHECK:   [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
206  ; CHECK:   early-clobber %336:sgpr_128, early-clobber %352:sgpr_128, early-clobber %328:sgpr_128, early-clobber %344:sgpr_128 = BUNDLE %327, %343, %335, %351 {
207  ; CHECK:     [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %327, 0, 0, 0 :: (load 16 from %ir.223, addrspace 4)
208  ; CHECK:     [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %335, 0, 0, 0 :: (load 16 from %ir.230, addrspace 4)
209  ; CHECK:     [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %343, 0, 0, 0 :: (load 16 from %ir.236, addrspace 4)
210  ; CHECK:     [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %351, 0, 0, 0 :: (load 16 from %ir.242, addrspace 4)
211  ; CHECK:   }
212  ; CHECK:   %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, [[S_MOV_B32_]], implicit-def dead $scc
213  ; CHECK:   [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0, 0 :: (dereferenceable invariant load 4)
214  ; CHECK:   early-clobber %329:vgpr_32, early-clobber %345:vgpr_32, early-clobber %337:vgpr_32 = BUNDLE [[S_LOAD_DWORDX4_IMM20]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], [[S_LOAD_DWORDX4_IMM21]], implicit $exec {
215  ; CHECK:     [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
216  ; CHECK:     [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
217  ; CHECK:     [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
218  ; CHECK:   }
219  ; CHECK:   [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 3, implicit-def dead $scc
220  ; CHECK:   [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
221  ; CHECK:   [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc
222  ; CHECK:   [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc
223  ; CHECK:   undef %468.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_7]], implicit-def $scc
224  ; CHECK:   %468.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc
225  ; CHECK:   %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %468, 0, 0, 0 :: (load 8 from %ir.316, addrspace 4)
226  ; CHECK:   %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, [[S_MOV_B32_]], implicit-def dead $scc
227  ; CHECK:   early-clobber %412:sgpr_128, early-clobber %487:sreg_32_xm0_xexec, early-clobber %475:sreg_32_xm0_xexec = BUNDLE %71, undef %488:sreg_64, %411 {
228  ; CHECK:     [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0, 0 :: (dereferenceable invariant load 4)
229  ; CHECK:     [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0, 0 :: (load 16 from %ir.278, addrspace 4)
230  ; CHECK:     [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0, 0 :: (load 4 from `i32 addrspace(4)* undef`, addrspace 4)
231  ; CHECK:   }
232  ; CHECK:   [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 3, implicit-def dead $scc
233  ; CHECK:   [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0, 0 :: (load 16 from %ir.287, addrspace 4)
234  ; CHECK:   [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc
235  ; CHECK:   [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc
236  ; CHECK:   undef %485.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_8]], implicit-def $scc
237  ; CHECK:   %485.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc
238  ; CHECK:   %71.sub0:sgpr_128 = S_LOAD_DWORD_IMM %485, 0, 0, 0 :: (load 4 from %ir..i0100.i, align 8, addrspace 4)
239  ; CHECK:   early-clobber %413:vgpr_32, early-clobber %427:vgpr_32 = BUNDLE [[S_LOAD_DWORDX4_IMM23]], [[S_LOAD_DWORDX4_IMM24]], [[V_MOV_B32_e32_]], implicit $exec {
240  ; CHECK:     [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
241  ; CHECK:     [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
242  ; CHECK:   }
243  ; CHECK:   %71.sub1:sgpr_128 = S_AND_B32 [[S_LOAD_DWORD_IMM]], [[S_MOV_B32_]], implicit-def dead $scc
244  ; CHECK:   [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0, 0 :: (dereferenceable invariant load 4)
245  ; CHECK:   [[S_ADD_I32_18:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM]], -474, implicit-def dead $scc
246  ; CHECK:   [[S_ADD_I32_19:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -475, implicit-def dead $scc
247  ; CHECK:   [[S_ADD_I32_20:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -491, implicit-def dead $scc
248  ; CHECK:   [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -507, implicit-def dead $scc
249  ; CHECK:   [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -539, implicit-def dead $scc
250  ; CHECK:   [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc
251  ; CHECK:   [[S_ADD_U32_5:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], 96, implicit-def $scc
252  ; CHECK:   [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %33:sreg_32, 0, implicit-def dead $scc, implicit $scc
253  ; CHECK:   undef %514.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_]], implicit-def $scc
254  ; CHECK:   %514.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
255  ; CHECK:   [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0, 0 :: (load 16 from %ir.347, addrspace 4)
256  ; CHECK:   undef %522.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_1]], implicit-def $scc
257  ; CHECK:   %522.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
258  ; CHECK:   [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0, 0 :: (load 16 from %ir.353, addrspace 4)
259  ; CHECK:   undef %530.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_2]], implicit-def $scc
260  ; CHECK:   %530.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
261  ; CHECK:   [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0, 0 :: (load 16 from %ir.359, addrspace 4)
262  ; CHECK:   early-clobber %516:vgpr_32, early-clobber %532:vgpr_32, early-clobber %524:vgpr_32 = BUNDLE [[S_LOAD_DWORDX4_IMM26]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], [[S_LOAD_DWORDX4_IMM27]], implicit $exec {
263  ; CHECK:     [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
264  ; CHECK:     [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
265  ; CHECK:     [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
266  ; CHECK:   }
267  ; CHECK:   [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
268  ; CHECK:   [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], implicit $exec
269  ; CHECK:   [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
270  ; CHECK:   [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]], implicit $exec
271  ; CHECK:   [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -4, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
272  ; CHECK:   [[V_OR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_]], [[V_ADD_U32_e32_2]], implicit $exec
273  ; CHECK:   [[V_SUBREV_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 27, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
274  ; CHECK:   [[V_OR_B32_e32_2:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_1]], [[V_ADD_U32_e32_3]], implicit $exec
275  ; CHECK:   [[V_SUBREV_U32_e32_1:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 28, [[BUFFER_LOAD_DWORD_OFFSET]], implicit $exec
276  ; CHECK:   [[V_OR_B32_e32_3:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_2]], [[V_SUBREV_U32_e32_]], implicit $exec
277  ; CHECK:   [[V_OR_B32_e32_4:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_3]], [[V_SUBREV_U32_e32_1]], implicit $exec
278  ; CHECK:   [[V_OR_B32_e32_5:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_SUB_I32_]], [[V_OR_B32_e32_4]], implicit $exec
279  ; CHECK:   [[V_OR_B32_e32_6:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_SUB_I32_1]], [[V_OR_B32_e32_5]], implicit $exec
280  ; CHECK:   [[V_OR_B32_e32_7:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_SUB_I32_2]], [[V_OR_B32_e32_6]], implicit $exec
281  ; CHECK:   [[V_SUBREV_U32_e32_2:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 32, [[BUFFER_LOAD_FORMAT_X_IDXEN2]], implicit $exec
282  ; CHECK:   [[V_SUBREV_U32_e32_3:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 33, [[BUFFER_LOAD_FORMAT_X_IDXEN3]], implicit $exec
283  ; CHECK:   [[V_OR_B32_e32_8:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_7]], [[V_SUBREV_U32_e32_2]], implicit $exec
284  ; CHECK:   [[V_SUBREV_U32_e32_4:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 34, [[BUFFER_LOAD_FORMAT_X_IDXEN4]], implicit $exec
285  ; CHECK:   [[V_OR_B32_e32_9:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_8]], [[V_SUBREV_U32_e32_3]], implicit $exec
286  ; CHECK:   [[V_SUBREV_U32_e32_5:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 36, [[BUFFER_LOAD_FORMAT_X_IDXEN5]], implicit $exec
287  ; CHECK:   [[V_OR_B32_e32_10:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_9]], [[V_SUBREV_U32_e32_4]], implicit $exec
288  ; CHECK:   [[V_SUBREV_U32_e32_6:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 37, [[BUFFER_LOAD_FORMAT_X_IDXEN6]], implicit $exec
289  ; CHECK:   [[V_OR_B32_e32_11:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_10]], [[V_SUBREV_U32_e32_5]], implicit $exec
290  ; CHECK:   [[V_SUBREV_U32_e32_7:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 38, [[BUFFER_LOAD_FORMAT_X_IDXEN7]], implicit $exec
291  ; CHECK:   [[V_OR_B32_e32_12:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_11]], [[V_SUBREV_U32_e32_6]], implicit $exec
292  ; CHECK:   [[V_SUBREV_U32_e32_8:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 39, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
293  ; CHECK:   [[V_OR_B32_e32_13:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_12]], [[V_SUBREV_U32_e32_7]], implicit $exec
294  ; CHECK:   [[V_SUBREV_U32_e32_9:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 50, [[BUFFER_LOAD_FORMAT_X_IDXEN8]], implicit $exec
295  ; CHECK:   [[V_OR_B32_e32_14:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_13]], [[V_SUBREV_U32_e32_8]], implicit $exec
296  ; CHECK:   [[V_SUBREV_U32_e32_10:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 51, [[BUFFER_LOAD_FORMAT_X_IDXEN9]], implicit $exec
297  ; CHECK:   [[V_OR_B32_e32_15:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_14]], [[V_SUBREV_U32_e32_9]], implicit $exec
298  ; CHECK:   [[V_SUBREV_U32_e32_11:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 52, [[BUFFER_LOAD_FORMAT_X_IDXEN10]], implicit $exec
299  ; CHECK:   [[V_OR_B32_e32_16:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_15]], [[V_SUBREV_U32_e32_10]], implicit $exec
300  ; CHECK:   [[V_SUBREV_U32_e32_12:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 53, [[BUFFER_LOAD_FORMAT_X_IDXEN11]], implicit $exec
301  ; CHECK:   [[V_OR_B32_e32_17:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_16]], [[V_SUBREV_U32_e32_11]], implicit $exec
302  ; CHECK:   [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -72, [[BUFFER_LOAD_FORMAT_X_IDXEN12]], implicit $exec
303  ; CHECK:   [[V_OR_B32_e32_18:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_17]], [[V_SUBREV_U32_e32_12]], implicit $exec
304  ; CHECK:   [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -73, [[BUFFER_LOAD_FORMAT_X_IDXEN13]], implicit $exec
305  ; CHECK:   [[V_OR_B32_e32_19:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_18]], [[V_ADD_U32_e32_4]], implicit $exec
306  ; CHECK:   [[V_ADD_U32_e32_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -74, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
307  ; CHECK:   [[V_OR_B32_e32_20:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_19]], [[V_ADD_U32_e32_5]], implicit $exec
308  ; CHECK:   [[V_ADD_U32_e32_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -75, [[BUFFER_LOAD_FORMAT_X_IDXEN14]], implicit $exec
309  ; CHECK:   [[V_OR_B32_e32_21:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_20]], [[V_ADD_U32_e32_6]], implicit $exec
310  ; CHECK:   [[V_ADD_U32_e32_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -77, [[BUFFER_LOAD_FORMAT_X_IDXEN15]], implicit $exec
311  ; CHECK:   [[V_OR_B32_e32_22:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_21]], [[V_ADD_U32_e32_7]], implicit $exec
312  ; CHECK:   [[V_ADD_U32_e32_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -93, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
313  ; CHECK:   [[V_OR_B32_e32_23:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_22]], [[V_ADD_U32_e32_8]], implicit $exec
314  ; CHECK:   [[V_ADD_U32_e32_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -94, [[BUFFER_LOAD_FORMAT_X_IDXEN16]], implicit $exec
315  ; CHECK:   [[V_OR_B32_e32_24:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_23]], [[V_ADD_U32_e32_9]], implicit $exec
316  ; CHECK:   [[V_ADD_U32_e32_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -95, [[BUFFER_LOAD_DWORD_OFFSET1]], implicit $exec
317  ; CHECK:   [[V_OR_B32_e32_25:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_24]], [[V_ADD_U32_e32_10]], implicit $exec
318  ; CHECK:   [[V_ADD_U32_e32_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -96, [[BUFFER_LOAD_DWORD_OFFSET2]], implicit $exec
319  ; CHECK:   [[V_OR_B32_e32_26:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_25]], [[V_ADD_U32_e32_11]], implicit $exec
320  ; CHECK:   [[V_ADD_U32_e32_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -97, [[BUFFER_LOAD_DWORD_OFFSET3]], implicit $exec
321  ; CHECK:   [[V_OR_B32_e32_27:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_26]], [[V_ADD_U32_e32_12]], implicit $exec
322  ; CHECK:   [[V_OR_B32_e32_28:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_27]], [[V_ADD_U32_e32_13]], implicit $exec
323  ; CHECK:   [[V_OR_B32_e32_29:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_2]], [[V_OR_B32_e32_28]], implicit $exec
324  ; CHECK:   [[V_OR_B32_e32_30:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_3]], [[V_OR_B32_e32_29]], implicit $exec
325  ; CHECK:   [[V_OR_B32_e32_31:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_4]], [[V_OR_B32_e32_30]], implicit $exec
326  ; CHECK:   [[V_ADD_U32_e32_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -194, [[BUFFER_LOAD_FORMAT_X_IDXEN17]], implicit $exec
327  ; CHECK:   [[V_OR_B32_e32_32:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_5]], [[V_OR_B32_e32_31]], implicit $exec
328  ; CHECK:   [[V_ADD_U32_e32_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -195, [[BUFFER_LOAD_FORMAT_X_IDXEN18]], implicit $exec
329  ; CHECK:   [[V_OR_B32_e32_33:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_32]], [[V_ADD_U32_e32_14]], implicit $exec
330  ; CHECK:   [[V_ADD_U32_e32_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -196, [[BUFFER_LOAD_FORMAT_X_IDXEN19]], implicit $exec
331  ; CHECK:   [[V_OR_B32_e32_34:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_33]], [[V_ADD_U32_e32_15]], implicit $exec
332  ; CHECK:   [[V_ADD_U32_e32_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -197, [[BUFFER_LOAD_FORMAT_X_IDXEN20]], implicit $exec
333  ; CHECK:   [[V_OR_B32_e32_35:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_34]], [[V_ADD_U32_e32_16]], implicit $exec
334  ; CHECK:   [[V_ADD_U32_e32_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -216, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
335  ; CHECK:   [[V_OR_B32_e32_36:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_35]], [[V_ADD_U32_e32_17]], implicit $exec
336  ; CHECK:   [[V_OR_B32_e32_37:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_36]], [[V_ADD_U32_e32_18]], implicit $exec
337  ; CHECK:   [[V_OR_B32_e32_38:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_7]], [[V_OR_B32_e32_37]], implicit $exec
338  ; CHECK:   [[V_OR_B32_e32_39:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_8]], [[V_OR_B32_e32_38]], implicit $exec
339  ; CHECK:   [[V_OR_B32_e32_40:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_9]], [[V_OR_B32_e32_39]], implicit $exec
340  ; CHECK:   [[V_OR_B32_e32_41:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_10]], [[V_OR_B32_e32_40]], implicit $exec
341  ; CHECK:   [[V_OR_B32_e32_42:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_11]], [[V_OR_B32_e32_41]], implicit $exec
342  ; CHECK:   [[V_OR_B32_e32_43:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_12]], [[V_OR_B32_e32_42]], implicit $exec
343  ; CHECK:   [[V_OR_B32_e32_44:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_13]], [[V_OR_B32_e32_43]], implicit $exec
344  ; CHECK:   [[V_ADD_U32_e32_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -457, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
345  ; CHECK:   [[V_OR_B32_e32_45:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_14]], [[V_OR_B32_e32_44]], implicit $exec
346  ; CHECK:   [[V_ADD_U32_e32_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -458, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
347  ; CHECK:   [[V_OR_B32_e32_46:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_45]], [[V_ADD_U32_e32_19]], implicit $exec
348  ; CHECK:   [[V_ADD_U32_e32_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -459, [[BUFFER_LOAD_FORMAT_X_IDXEN21]], implicit $exec
349  ; CHECK:   [[V_OR_B32_e32_47:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_46]], [[V_ADD_U32_e32_20]], implicit $exec
350  ; CHECK:   [[V_ADD_U32_e32_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -466, [[BUFFER_LOAD_FORMAT_X_IDXEN22]], implicit $exec
351  ; CHECK:   [[V_OR_B32_e32_48:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_47]], [[V_ADD_U32_e32_21]], implicit $exec
352  ; CHECK:   [[V_OR_B32_e32_49:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_48]], [[V_ADD_U32_e32_22]], implicit $exec
353  ; CHECK:   [[V_OR_B32_e32_50:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_15]], [[V_OR_B32_e32_49]], implicit $exec
354  ; CHECK:   [[V_OR_B32_e32_51:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_16]], [[V_OR_B32_e32_50]], implicit $exec
355  ; CHECK:   [[V_OR_B32_e32_52:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_17]], [[V_OR_B32_e32_51]], implicit $exec
356  ; CHECK:   [[V_OR_B32_e32_53:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_23]], [[V_OR_B32_e32_52]], implicit $exec
357  ; CHECK:   [[V_OR_B32_e32_54:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_18]], [[V_OR_B32_e32_53]], implicit $exec
358  ; CHECK:   [[V_OR_B32_e32_55:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_19]], [[V_OR_B32_e32_54]], implicit $exec
359  ; CHECK:   [[V_OR_B32_e32_56:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_20]], [[V_OR_B32_e32_55]], implicit $exec
360  ; CHECK:   [[V_OR_B32_e32_57:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_21]], [[V_OR_B32_e32_56]], implicit $exec
361  ; CHECK:   [[V_OR_B32_e32_58:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_22]], [[V_OR_B32_e32_57]], implicit $exec
362  ; CHECK:   [[V_ADD_U32_e32_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -555, [[BUFFER_LOAD_FORMAT_X_IDXEN23]], implicit $exec
363  ; CHECK:   [[V_ADD_U32_e32_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -556, [[BUFFER_LOAD_FORMAT_X_IDXEN24]], implicit $exec
364  ; CHECK:   [[V_OR_B32_e32_59:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_58]], [[V_ADD_U32_e32_23]], implicit $exec
365  ; CHECK:   [[V_ADD_U32_e32_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -557, [[BUFFER_LOAD_FORMAT_X_IDXEN25]], implicit $exec
366  ; CHECK:   [[V_OR_B32_e32_60:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_59]], [[V_ADD_U32_e32_24]], implicit $exec
367  ; CHECK:   [[V_ADD_U32_e32_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -574, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
368  ; CHECK:   [[V_OR_B32_e32_61:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_60]], [[V_ADD_U32_e32_25]], implicit $exec
369  ; CHECK:   [[V_ADD_U32_e32_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -575, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
370  ; CHECK:   [[V_OR_B32_e32_62:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_61]], [[V_ADD_U32_e32_26]], implicit $exec
371  ; CHECK:   [[COPY13]].sub1:sgpr_128 = COPY [[S_AND_B32_]]
372  ; CHECK:   [[S_BUFFER_LOAD_DWORD_IMM8:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY13]], 0, 0, 0 :: (dereferenceable invariant load 4)
373  ; CHECK:   [[V_ADD_U32_e32_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -576, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
374  ; CHECK:   [[V_OR_B32_e32_63:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_62]], [[V_ADD_U32_e32_27]], implicit $exec
375  ; CHECK:   [[V_ADD_U32_e32_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -577, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
376  ; CHECK:   [[V_OR_B32_e32_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_63]], [[V_ADD_U32_e32_28]], implicit $exec
377  ; CHECK:   [[V_ADD_U32_e32_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
378  ; CHECK:   [[V_OR_B32_e32_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_64]], [[V_ADD_U32_e32_29]], implicit $exec
379  ; CHECK:   [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %564:sreg_64, 0, 0, 0 :: (load 32 from `<8 x i32> addrspace(4)* undef`, addrspace 4)
380  ; CHECK:   [[V_OR_B32_e32_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_65]], [[V_ADD_U32_e32_30]], implicit $exec
381  ; CHECK:   [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc
382  ; CHECK:   [[V_OR_B32_e32_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_24]], [[V_OR_B32_e32_66]], implicit $exec
383  ; CHECK:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e32_67]], implicit $exec
384  ; CHECK:   undef %691.sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec
385  ; CHECK:   IMAGE_STORE_V4_V2_gfx10 %691, undef %578:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "ImageResource")
386  ; CHECK:   S_ENDPGM 0
387.expVert:
388  %0 = extractelement <31 x i32> %userData, i64 2
389  %1 = extractelement <31 x i32> %userData, i64 3
390  %2 = extractelement <31 x i32> %userData, i64 4
391  %3 = extractelement <31 x i32> %userData, i64 7
392  %4 = extractelement <31 x i32> %userData, i64 8
393  %5 = extractelement <31 x i32> %userData, i64 9
394  %6 = extractelement <31 x i32> %userData, i64 17
395  %7 = extractelement <31 x i32> %userData, i64 18
396  %8 = extractelement <31 x i32> %userData, i64 19
397  %9 = extractelement <31 x i32> %userData, i64 20
398  %10 = extractelement <31 x i32> %userData, i64 21
399  %11 = extractelement <31 x i32> %userData, i64 22
400  %12 = extractelement <31 x i32> %userData, i64 24
401  %13 = extractelement <31 x i32> %userData, i64 26
402  %14 = extractelement <31 x i32> %userData, i64 30
403  %15 = insertelement <2 x i32> undef, i32 %13, i32 0
404  %16 = bitcast <2 x i32> %15 to i64
405  %17 = inttoptr i64 %16 to i8 addrspace(4)*
406  %18 = insertelement <2 x i32> undef, i32 %12, i32 0
407  %19 = bitcast <2 x i32> %18 to i64
408  %20 = inttoptr i64 %19 to i8 addrspace(4)*
409  %21 = insertelement <2 x i32> undef, i32 %11, i32 0
410  %22 = bitcast <2 x i32> %21 to i64
411  %23 = insertelement <2 x i32> undef, i32 %10, i32 0
412  %24 = bitcast <2 x i32> %23 to i64
413  %25 = insertelement <2 x i32> undef, i32 %9, i32 0
414  %26 = bitcast <2 x i32> %25 to i64
415  %27 = inttoptr i64 %26 to i8 addrspace(4)*
416  %28 = insertelement <2 x i32> undef, i32 %8, i32 0
417  %29 = bitcast <2 x i32> %28 to i64
418  %30 = insertelement <2 x i32> undef, i32 %7, i32 0
419  %31 = bitcast <2 x i32> %30 to i64
420  %32 = inttoptr i64 %31 to i8 addrspace(4)*
421  %33 = insertelement <2 x i32> undef, i32 %6, i32 0
422  %34 = bitcast <2 x i32> %33 to i64
423  %35 = inttoptr i64 %34 to i8 addrspace(4)*
424  %36 = insertelement <2 x i32> undef, i32 %14, i32 0
425  %37 = bitcast <2 x i32> %36 to i64
426  %38 = inttoptr i64 %37 to i8 addrspace(4)*
427  %39 = getelementptr i8, i8 addrspace(4)* %38, i64 232
428  %.i0.i = bitcast i8 addrspace(4)* %39 to i32 addrspace(4)*
429  %rootDesc58.ii0.i = load i32, i32 addrspace(4)* %.i0.i, align 8
430  %.i184.i = getelementptr i8, i8 addrspace(4)* %38, i64 236
431  %40 = bitcast i8 addrspace(4)* %.i184.i to i32 addrspace(4)*
432  %rootDesc58.ii1.i = load i32, i32 addrspace(4)* %40, align 4
433  %41 = and i32 %rootDesc58.ii1.i, 65535
434  %42 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %rootDesc58.ii0.i, i32 0
435  %43 = insertelement <4 x i32> %42, i32 %41, i32 1
436  %44 = and i32 undef, 65535
437  %45 = insertelement <4 x i32> undef, i32 %44, i32 1
438  %46 = load <4 x i32>, <4 x i32> addrspace(4)* undef, align 16
439  %47 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %46, i32 0, i32 0, i32 0, i32 0)
440  %48 = add i32 %47, -1
441  %49 = shl i32 %0, 4
442  %50 = call i32 @llvm.amdgcn.readfirstlane(i32 %49)
443  %51 = sext i32 %50 to i64
444  %52 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
445  %53 = add i32 %52, -2
446  %54 = or i32 %53, %48
447  %55 = shl i32 %1, 4
448  %56 = call i32 @llvm.amdgcn.readfirstlane(i32 %55)
449  %57 = sext i32 %56 to i64
450  %58 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
451  %59 = add i32 %58, -3
452  %60 = or i32 %54, %59
453  %61 = shl i32 %2, 4
454  %62 = call i32 @llvm.amdgcn.readfirstlane(i32 %61)
455  %63 = sext i32 %62 to i64
456  %64 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
457  %65 = add i32 %64, -4
458  %66 = or i32 %60, %65
459  %67 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
460  %68 = add i32 %67, -27
461  %69 = or i32 %66, %68
462  %70 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> undef, i32 0, i32 0, i32 0)
463  %71 = add i32 %70, -28
464  %72 = or i32 %69, %71
465  %73 = call i32 @llvm.amdgcn.readfirstlane(i32 %0)
466  %74 = getelementptr i8, i8 addrspace(4)* %35, i64 16
467  %75 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 0, i32 0)
468  %76 = add i32 %75, -29
469  %77 = or i32 %72, %76
470  %78 = call i32 @llvm.amdgcn.readfirstlane(i32 %1)
471  %79 = shl i32 %78, 4
472  %80 = sext i32 %79 to i64
473  %81 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 0, i32 0)
474  %82 = add i32 %81, -30
475  %83 = or i32 %77, %82
476  %84 = call i32 @llvm.amdgcn.readfirstlane(i32 %2)
477  %85 = shl i32 %84, 4
478  %86 = sext i32 %85 to i64
479  %87 = getelementptr i8, i8 addrspace(4)* %74, i64 %86
480  %88 = bitcast i8 addrspace(4)* %87 to <4 x i32> addrspace(4)*
481  %89 = load <4 x i32>, <4 x i32> addrspace(4)* %88, align 16
482  %90 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %89, i32 0, i32 0)
483  %91 = add i32 %90, -31
484  %92 = or i32 %83, %91
485  %93 = getelementptr i8, i8 addrspace(4)* %35, i64 64
486  %94 = getelementptr i8, i8 addrspace(4)* %93, i64 %51
487  %95 = bitcast i8 addrspace(4)* %94 to <4 x i32> addrspace(4)*
488  %96 = load <4 x i32>, <4 x i32> addrspace(4)* %95, align 16
489  %97 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %96, i32 0, i32 0, i32 0, i32 0)
490  %98 = add i32 %97, -32
491  %99 = or i32 %92, %98
492  %100 = getelementptr i8, i8 addrspace(4)* %93, i64 %57
493  %101 = bitcast i8 addrspace(4)* %100 to <4 x i32> addrspace(4)*
494  %102 = load <4 x i32>, <4 x i32> addrspace(4)* %101, align 16
495  %103 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %102, i32 0, i32 0, i32 0, i32 0)
496  %104 = add i32 %103, -33
497  %105 = or i32 %99, %104
498  %106 = getelementptr i8, i8 addrspace(4)* %93, i64 %63
499  %107 = bitcast i8 addrspace(4)* %106 to <4 x i32> addrspace(4)*
500  %108 = load <4 x i32>, <4 x i32> addrspace(4)* %107, align 16
501  %109 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %108, i32 0, i32 0, i32 0, i32 0)
502  %110 = add i32 %109, -34
503  %111 = or i32 %105, %110
504  %112 = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
505  %113 = sext i32 %112 to i64
506  %114 = getelementptr i8, i8 addrspace(4)* %93, i64 %113
507  %115 = bitcast i8 addrspace(4)* %114 to <4 x i32> addrspace(4)*
508  %116 = load <4 x i32>, <4 x i32> addrspace(4)* %115, align 16
509  %117 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %116, i32 0, i32 0, i32 0, i32 0)
510  %118 = add i32 %117, -36
511  %119 = or i32 %111, %118
512  %120 = getelementptr i8, i8 addrspace(4)* %32, i64 %51
513  %121 = bitcast i8 addrspace(4)* %120 to <4 x i32> addrspace(4)*
514  %122 = load <4 x i32>, <4 x i32> addrspace(4)* %121, align 16
515  %123 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %122, i32 0, i32 0, i32 0, i32 0)
516  %124 = add i32 %123, -37
517  %125 = or i32 %119, %124
518  %126 = getelementptr i8, i8 addrspace(4)* %32, i64 %57
519  %127 = bitcast i8 addrspace(4)* %126 to <4 x i32> addrspace(4)*
520  %128 = load <4 x i32>, <4 x i32> addrspace(4)* %127, align 16
521  %129 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %128, i32 0, i32 0, i32 0, i32 0)
522  %130 = add i32 %129, -38
523  %131 = or i32 %125, %130
524  %132 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
525  %133 = add i32 %132, -39
526  %134 = or i32 %131, %133
527  %135 = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
528  %136 = sext i32 %135 to i64
529  %137 = getelementptr i8, i8 addrspace(4)* %32, i64 %136
530  %138 = bitcast i8 addrspace(4)* %137 to <4 x i32> addrspace(4)*
531  %139 = load <4 x i32>, <4 x i32> addrspace(4)* %138, align 16
532  %140 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %139, i32 0, i32 0, i32 0, i32 0)
533  %141 = add i32 %140, -50
534  %142 = or i32 %134, %141
535  %143 = getelementptr i8, i8 addrspace(4)* %32, i64 224
536  %144 = getelementptr i8, i8 addrspace(4)* %143, i64 %51
537  %145 = bitcast i8 addrspace(4)* %144 to <4 x i32> addrspace(4)*
538  %146 = load <4 x i32>, <4 x i32> addrspace(4)* %145, align 16
539  %147 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %146, i32 0, i32 0, i32 0, i32 0)
540  %148 = add i32 %147, -51
541  %149 = or i32 %142, %148
542  %150 = getelementptr i8, i8 addrspace(4)* %143, i64 %57
543  %151 = bitcast i8 addrspace(4)* %150 to <4 x i32> addrspace(4)*
544  %152 = load <4 x i32>, <4 x i32> addrspace(4)* %151, align 16
545  %153 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %152, i32 0, i32 0, i32 0, i32 0)
546  %154 = add i32 %153, -52
547  %155 = or i32 %149, %154
548  %156 = getelementptr i8, i8 addrspace(4)* %143, i64 %63
549  %157 = bitcast i8 addrspace(4)* %156 to <4 x i32> addrspace(4)*
550  %158 = load <4 x i32>, <4 x i32> addrspace(4)* %157, align 16
551  %159 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %158, i32 0, i32 0, i32 0, i32 0)
552  %160 = add i32 %159, -53
553  %161 = or i32 %155, %160
554  %162 = sext i32 undef to i64
555  %163 = getelementptr i8, i8 addrspace(4)* %143, i64 %162
556  %164 = bitcast i8 addrspace(4)* %163 to <4 x i32> addrspace(4)*
557  %165 = load <4 x i32>, <4 x i32> addrspace(4)* %164, align 16
558  %166 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %165, i32 0, i32 0, i32 0, i32 0)
559  %167 = add i32 %166, -72
560  %168 = or i32 %161, %167
561  %169 = getelementptr i8, i8 addrspace(4)* %32, i64 576
562  %170 = getelementptr i8, i8 addrspace(4)* %169, i64 %51
563  %171 = bitcast i8 addrspace(4)* %170 to <4 x i32> addrspace(4)*
564  %172 = load <4 x i32>, <4 x i32> addrspace(4)* %171, align 16
565  %173 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %172, i32 0, i32 0, i32 0, i32 0)
566  %174 = add i32 %173, -73
567  %175 = or i32 %168, %174
568  %176 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
569  %177 = add i32 %176, -74
570  %178 = or i32 %175, %177
571  %179 = getelementptr i8, i8 addrspace(4)* %169, i64 %63
572  %180 = bitcast i8 addrspace(4)* %179 to <4 x i32> addrspace(4)*
573  %181 = load <4 x i32>, <4 x i32> addrspace(4)* %180, align 16
574  %182 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %181, i32 0, i32 0, i32 0, i32 0)
575  %183 = add i32 %182, -75
576  %184 = or i32 %178, %183
577  %185 = getelementptr i8, i8 addrspace(4)* %169, i64 %113
578  %186 = bitcast i8 addrspace(4)* %185 to <4 x i32> addrspace(4)*
579  %187 = load <4 x i32>, <4 x i32> addrspace(4)* %186, align 16
580  %188 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %187, i32 0, i32 0, i32 0, i32 0)
581  %189 = add i32 %188, -77
582  %190 = or i32 %184, %189
583  %191 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
584  %192 = add i32 %191, -93
585  %193 = or i32 %190, %192
586  %194 = inttoptr i64 %29 to i8 addrspace(4)*
587  %195 = getelementptr i8, i8 addrspace(4)* %194, i64 %51
588  %196 = bitcast i8 addrspace(4)* %195 to <4 x i32> addrspace(4)*
589  %197 = load <4 x i32>, <4 x i32> addrspace(4)* %196, align 16
590  %198 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %197, i32 0, i32 0, i32 0, i32 0)
591  %199 = add i32 %198, -94
592  %200 = or i32 %193, %199
593  %201 = load <4 x i32>, <4 x i32> addrspace(4)* undef, align 16
594  %202 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %201, i32 0, i32 0, i32 0)
595  %203 = add i32 %202, -95
596  %204 = or i32 %200, %203
597  %205 = getelementptr i8, i8 addrspace(4)* %27, i64 %80
598  %206 = bitcast i8 addrspace(4)* %205 to <4 x i32> addrspace(4)*
599  %207 = load <4 x i32>, <4 x i32> addrspace(4)* %206, align 16
600  %208 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %207, i32 0, i32 0, i32 0)
601  %209 = add i32 %208, -96
602  %210 = or i32 %204, %209
603  %211 = getelementptr i8, i8 addrspace(4)* %27, i64 %86
604  %212 = bitcast i8 addrspace(4)* %211 to <4 x i32> addrspace(4)*
605  %213 = load <4 x i32>, <4 x i32> addrspace(4)* %212, align 16
606  %214 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %213, i32 0, i32 0, i32 0)
607  %215 = add i32 %214, -97
608  %216 = or i32 %210, %215
609  %217 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, <{ [4 x i32], [6 x %llpc.array.element] }> addrspace(6)* null, i32 0, i32 1, i32 %0, i32 0
610  %218 = ptrtoint i32 addrspace(6)* %217 to i32
611  %219 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %45, i32 %218, i32 0)
612  %220 = add i32 %219, -98
613  %221 = or i32 %216, %220
614  %222 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %45, i32 undef, i32 0)
615  %223 = add i32 %222, -114
616  %224 = or i32 %221, %223
617  %225 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, <{ [4 x i32], [6 x %llpc.array.element] }> addrspace(6)* null, i32 0, i32 1, i32 %2, i32 0
618  %226 = ptrtoint i32 addrspace(6)* %225 to i32
619  %227 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %45, i32 %226, i32 0)
620  %228 = add i32 %227, -130
621  %229 = or i32 %224, %228
622  %230 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, <{ [4 x i32], [6 x %llpc.array.element] }> addrspace(6)* null, i32 0, i32 1, i32 undef, i32 0
623  %231 = ptrtoint i32 addrspace(6)* %230 to i32
624  %232 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %45, i32 %231, i32 0)
625  %233 = add i32 %232, -178
626  %234 = or i32 %229, %233
627  %235 = inttoptr i64 %24 to i8 addrspace(4)*
628  %236 = getelementptr i8, i8 addrspace(4)* %235, i64 %51
629  %237 = bitcast i8 addrspace(4)* %236 to <4 x i32> addrspace(4)*
630  %238 = load <4 x i32>, <4 x i32> addrspace(4)* %237, align 16
631  %239 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %238, i32 0, i32 0, i32 0, i32 0)
632  %240 = add i32 %239, -194
633  %241 = or i32 %234, %240
634  %242 = inttoptr i64 %22 to i8 addrspace(4)*
635  %243 = getelementptr i8, i8 addrspace(4)* %242, i64 %51
636  %244 = bitcast i8 addrspace(4)* %243 to <4 x i32> addrspace(4)*
637  %245 = load <4 x i32>, <4 x i32> addrspace(4)* %244, align 16
638  %246 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %245, i32 0, i32 0, i32 0, i32 0)
639  %247 = add i32 %246, -195
640  %248 = or i32 %241, %247
641  %249 = getelementptr i8, i8 addrspace(4)* %242, i64 %57
642  %250 = bitcast i8 addrspace(4)* %249 to <4 x i32> addrspace(4)*
643  %251 = load <4 x i32>, <4 x i32> addrspace(4)* %250, align 16
644  %252 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %251, i32 0, i32 0, i32 0, i32 0)
645  %253 = add i32 %252, -196
646  %254 = or i32 %248, %253
647  %255 = getelementptr i8, i8 addrspace(4)* %242, i64 %63
648  %256 = bitcast i8 addrspace(4)* %255 to <4 x i32> addrspace(4)*
649  %257 = load <4 x i32>, <4 x i32> addrspace(4)* %256, align 16
650  %258 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %257, i32 0, i32 0, i32 0, i32 0)
651  %259 = add i32 %258, -197
652  %260 = or i32 %254, %259
653  %261 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
654  %262 = add i32 %261, -216
655  %263 = or i32 %260, %262
656  %264 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, <{ [4 x i32], [6 x %llpc.array.element.2] }> addrspace(6)* null, i32 0, i32 1, i32 %0, i32 0
657  %265 = ptrtoint i32 addrspace(6)* %264 to i32
658  %266 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %265, i32 0)
659  %267 = add i32 %266, -217
660  %268 = or i32 %263, %267
661  %269 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
662  %270 = add i32 %269, -233
663  %271 = or i32 %268, %270
664  %272 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, <{ [4 x i32], [6 x %llpc.array.element.2] }> addrspace(6)* null, i32 0, i32 1, i32 %2, i32 0
665  %273 = ptrtoint i32 addrspace(6)* %272 to i32
666  %274 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %273, i32 0)
667  %275 = add i32 %274, -249
668  %276 = or i32 %271, %275
669  %277 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, <{ [4 x i32], [6 x %llpc.array.element.2] }> addrspace(6)* null, i32 0, i32 1, i32 undef, i32 0
670  %278 = ptrtoint i32 addrspace(6)* %277 to i32
671  %279 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %278, i32 0)
672  %280 = add i32 %279, -297
673  %281 = or i32 %276, %280
674  %282 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
675  %283 = add i32 %282, -313
676  %284 = or i32 %281, %283
677  %285 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
678  %286 = add i32 %285, -329
679  %287 = or i32 %284, %286
680  %288 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
681  %289 = add i32 %288, -345
682  %290 = or i32 %287, %289
683  %291 = getelementptr <{ [4 x i32], [9 x %llpc.array.element.5] }>, <{ [4 x i32], [9 x %llpc.array.element.5] }> addrspace(6)* null, i32 0, i32 1, i32 %4, i32 0
684  %292 = ptrtoint i32 addrspace(6)* %291 to i32
685  %293 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %292, i32 0)
686  %294 = add i32 %293, -441
687  %295 = or i32 %290, %294
688  %296 = getelementptr i8, i8 addrspace(4)* %20, i64 160
689  %297 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
690  %298 = add i32 %297, -457
691  %299 = or i32 %295, %298
692  %300 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
693  %301 = add i32 %300, -458
694  %302 = or i32 %299, %301
695  %303 = getelementptr i8, i8 addrspace(4)* %296, i64 %63
696  %304 = bitcast i8 addrspace(4)* %303 to <4 x i32> addrspace(4)*
697  %305 = load <4 x i32>, <4 x i32> addrspace(4)* %304, align 16
698  %306 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %305, i32 0, i32 0, i32 0, i32 0)
699  %307 = add i32 %306, -459
700  %308 = or i32 %302, %307
701  %309 = shl i32 %5, 4
702  %310 = call i32 @llvm.amdgcn.readfirstlane(i32 %309)
703  %311 = sext i32 %310 to i64
704  %312 = getelementptr i8, i8 addrspace(4)* %296, i64 %311
705  %313 = bitcast i8 addrspace(4)* %312 to <4 x i32> addrspace(4)*
706  %314 = load <4 x i32>, <4 x i32> addrspace(4)* %313, align 16
707  %315 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %314, i32 0, i32 0, i32 0, i32 0)
708  %316 = add i32 %315, -466
709  %317 = or i32 %308, %316
710  %318 = getelementptr i8, i8 addrspace(4)* %38, i64 168
711  %319 = shl i32 %73, 3
712  %320 = sext i32 %319 to i64
713  %321 = getelementptr i8, i8 addrspace(4)* %318, i64 %320
714  %.i085.i = bitcast i8 addrspace(4)* %321 to i32 addrspace(4)*
715  %.ii0.i = load i32, i32 addrspace(4)* %.i085.i, align 8
716  %322 = and i32 undef, 65535
717  %323 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii0.i, i32 0
718  %324 = insertelement <4 x i32> %323, i32 %322, i32 1
719  %325 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %324, i32 0, i32 0)
720  %326 = add i32 %325, -467
721  %327 = or i32 %317, %326
722  %328 = shl i32 %78, 3
723  %329 = sext i32 %328 to i64
724  %330 = getelementptr i8, i8 addrspace(4)* %318, i64 %329
725  %.i088.i = bitcast i8 addrspace(4)* %330 to i32 addrspace(4)*
726  %.ii090.i = load i32, i32 addrspace(4)* %.i088.i, align 8
727  %.i191.i = getelementptr i8, i8 addrspace(4)* %330, i64 4
728  %331 = bitcast i8 addrspace(4)* %.i191.i to i32 addrspace(4)*
729  %.ii192.i = load i32, i32 addrspace(4)* %331, align 4
730  %332 = and i32 %.ii192.i, 65535
731  %333 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii090.i, i32 0
732  %334 = insertelement <4 x i32> %333, i32 %332, i32 1
733  %335 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %334, i32 0, i32 0)
734  %336 = add i32 %335, -468
735  %337 = or i32 %327, %336
736  %338 = shl i32 %84, 3
737  %339 = sext i32 %338 to i64
738  %340 = getelementptr i8, i8 addrspace(4)* %318, i64 %339
739  %.i094.i = bitcast i8 addrspace(4)* %340 to i32 addrspace(4)*
740  %.ii096.i = load i32, i32 addrspace(4)* %.i094.i, align 8
741  %.i197.i = getelementptr i8, i8 addrspace(4)* %340, i64 4
742  %341 = bitcast i8 addrspace(4)* %.i197.i to i32 addrspace(4)*
743  %.ii198.i = load i32, i32 addrspace(4)* %341, align 4
744  %342 = and i32 %.ii198.i, 65535
745  %343 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii096.i, i32 0
746  %344 = insertelement <4 x i32> %343, i32 %342, i32 1
747  %345 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %344, i32 0, i32 0)
748  %346 = add i32 %345, -469
749  %347 = or i32 %337, %346
750  %348 = call i32 @llvm.amdgcn.readfirstlane(i32 %3)
751  %349 = shl i32 %348, 3
752  %350 = sext i32 %349 to i64
753  %351 = getelementptr i8, i8 addrspace(4)* %318, i64 %350
754  %.i0100.i = bitcast i8 addrspace(4)* %351 to i32 addrspace(4)*
755  %.ii0102.i = load i32, i32 addrspace(4)* %.i0100.i, align 8
756  %.ii1104.i = load i32, i32 addrspace(4)* undef, align 4
757  %352 = and i32 %.ii1104.i, 65535
758  %353 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii0102.i, i32 0
759  %354 = insertelement <4 x i32> %353, i32 %352, i32 1
760  %355 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %354, i32 0, i32 0)
761  %356 = add i32 %355, -473
762  %357 = or i32 %347, %356
763  %358 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 0, i32 0)
764  %359 = add i32 %358, -474
765  %360 = or i32 %357, %359
766  %361 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
767  %362 = add i32 %361, -475
768  %363 = or i32 %360, %362
769  %364 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
770  %365 = add i32 %364, -491
771  %366 = or i32 %363, %365
772  %367 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
773  %368 = add i32 %367, -507
774  %369 = or i32 %366, %368
775  %370 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
776  %371 = add i32 %370, -539
777  %372 = or i32 %369, %371
778  %373 = getelementptr i8, i8 addrspace(4)* %17, i64 96
779  %374 = getelementptr i8, i8 addrspace(4)* %373, i64 %51
780  %375 = bitcast i8 addrspace(4)* %374 to <4 x i32> addrspace(4)*
781  %376 = load <4 x i32>, <4 x i32> addrspace(4)* %375, align 16
782  %377 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %376, i32 0, i32 0, i32 0, i32 0)
783  %378 = add i32 %377, -555
784  %379 = or i32 %372, %378
785  %380 = getelementptr i8, i8 addrspace(4)* %373, i64 %57
786  %381 = bitcast i8 addrspace(4)* %380 to <4 x i32> addrspace(4)*
787  %382 = load <4 x i32>, <4 x i32> addrspace(4)* %381, align 16
788  %383 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %382, i32 0, i32 0, i32 0, i32 0)
789  %384 = add i32 %383, -556
790  %385 = or i32 %379, %384
791  %386 = getelementptr i8, i8 addrspace(4)* %373, i64 %63
792  %387 = bitcast i8 addrspace(4)* %386 to <4 x i32> addrspace(4)*
793  %388 = load <4 x i32>, <4 x i32> addrspace(4)* %387, align 16
794  %389 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %388, i32 0, i32 0, i32 0, i32 0)
795  %390 = add i32 %389, -557
796  %391 = or i32 %385, %390
797  %392 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
798  %393 = add i32 %392, -574
799  %394 = or i32 %391, %393
800  %395 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
801  %396 = add i32 %395, -575
802  %397 = or i32 %394, %396
803  %398 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
804  %399 = add i32 %398, -576
805  %400 = or i32 %397, %399
806  %401 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
807  %402 = add i32 %401, -577
808  %403 = or i32 %400, %402
809  %404 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
810  %405 = add i32 %404, -593
811  %406 = or i32 %403, %405
812  %407 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %43, i32 0, i32 0)
813  %408 = add i32 %407, -594
814  %409 = or i32 %406, %408
815  %.not.i = icmp eq i32 %409, 0
816  %410 = load <8 x i32>, <8 x i32> addrspace(4)* undef, align 32
817  %.i010.i = select i1 %.not.i, float 0x36A0000000000000, float 0.000000e+00
818  %411 = insertelement <4 x float> undef, float %.i010.i, i32 3
819  call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %411, i32 15, i32 undef, i32 undef, <8 x i32> %410, i32 0, i32 0)
820  ret void
821}
822
823declare i32 @llvm.amdgcn.readfirstlane(i32)
824declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
825declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)
826declare i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32>, i32, i32, i32, i32 immarg)
827declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32 immarg)
828declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
829declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32)
830