1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSA-VI %s
3; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s
4
5define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
6  ; HSA-VI-LABEL: name: i8_arg
7  ; HSA-VI: bb.1 (%ir-block.0):
8  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
9  ; HSA-VI-NEXT: {{  $}}
10  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
11  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
12  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
13  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
14  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
15  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
16  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
17  ; HSA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
18  ; HSA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
19  ; HSA-VI-NEXT:   S_ENDPGM 0
20  ; LEGACY-MESA-VI-LABEL: name: i8_arg
21  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
22  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
23  ; LEGACY-MESA-VI-NEXT: {{  $}}
24  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
25  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
26  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
27  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
28  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
29  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
30  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
31  ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
32  ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
33  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
34  %ext = zext i8 %in to i32
35  store i32 %ext, i32 addrspace(1)* %out, align 4
36  ret void
37}
38
39define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
40  ; HSA-VI-LABEL: name: i8_zext_arg
41  ; HSA-VI: bb.1 (%ir-block.0):
42  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
43  ; HSA-VI-NEXT: {{  $}}
44  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
45  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
46  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
47  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
48  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
49  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
50  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
51  ; HSA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
52  ; HSA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
53  ; HSA-VI-NEXT:   S_ENDPGM 0
54  ; LEGACY-MESA-VI-LABEL: name: i8_zext_arg
55  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
56  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
57  ; LEGACY-MESA-VI-NEXT: {{  $}}
58  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
59  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
60  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
61  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
62  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
63  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
64  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
65  ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
66  ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
67  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
68  %ext = zext i8 %in to i32
69  store i32 %ext, i32 addrspace(1)* %out, align 4
70  ret void
71}
72
73define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
74  ; HSA-VI-LABEL: name: i8_sext_arg
75  ; HSA-VI: bb.1 (%ir-block.0):
76  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
77  ; HSA-VI-NEXT: {{  $}}
78  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
79  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
80  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
81  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
82  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
83  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
84  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
85  ; HSA-VI-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
86  ; HSA-VI-NEXT:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
87  ; HSA-VI-NEXT:   S_ENDPGM 0
88  ; LEGACY-MESA-VI-LABEL: name: i8_sext_arg
89  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
90  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
91  ; LEGACY-MESA-VI-NEXT: {{  $}}
92  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
93  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
94  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
95  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
96  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
97  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
98  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
99  ; LEGACY-MESA-VI-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
100  ; LEGACY-MESA-VI-NEXT:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
101  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
102  %ext = sext i8 %in to i32
103  store i32 %ext, i32 addrspace(1)* %out, align 4
104  ret void
105}
106
107define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
108  ; HSA-VI-LABEL: name: i16_arg
109  ; HSA-VI: bb.1 (%ir-block.0):
110  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
111  ; HSA-VI-NEXT: {{  $}}
112  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
113  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
114  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
115  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
116  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
117  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
118  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
119  ; HSA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
120  ; HSA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
121  ; HSA-VI-NEXT:   S_ENDPGM 0
122  ; LEGACY-MESA-VI-LABEL: name: i16_arg
123  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
124  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
125  ; LEGACY-MESA-VI-NEXT: {{  $}}
126  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
127  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
128  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
129  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
130  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
131  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
132  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
133  ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
134  ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
135  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
136  %ext = zext i16 %in to i32
137  store i32 %ext, i32 addrspace(1)* %out, align 4
138  ret void
139}
140
141define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
142  ; HSA-VI-LABEL: name: i16_zext_arg
143  ; HSA-VI: bb.1 (%ir-block.0):
144  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
145  ; HSA-VI-NEXT: {{  $}}
146  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
147  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
148  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
149  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
150  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
151  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
152  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
153  ; HSA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
154  ; HSA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
155  ; HSA-VI-NEXT:   S_ENDPGM 0
156  ; LEGACY-MESA-VI-LABEL: name: i16_zext_arg
157  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
158  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
159  ; LEGACY-MESA-VI-NEXT: {{  $}}
160  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
161  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
162  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
163  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
164  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
165  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
166  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
167  ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
168  ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
169  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
170  %ext = zext i16 %in to i32
171  store i32 %ext, i32 addrspace(1)* %out, align 4
172  ret void
173}
174
175define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
176  ; HSA-VI-LABEL: name: i16_sext_arg
177  ; HSA-VI: bb.1 (%ir-block.0):
178  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
179  ; HSA-VI-NEXT: {{  $}}
180  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
181  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
182  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
183  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
184  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
185  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
186  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
187  ; HSA-VI-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16)
188  ; HSA-VI-NEXT:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
189  ; HSA-VI-NEXT:   S_ENDPGM 0
190  ; LEGACY-MESA-VI-LABEL: name: i16_sext_arg
191  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
192  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
193  ; LEGACY-MESA-VI-NEXT: {{  $}}
194  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
195  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
196  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
197  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
198  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
199  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
200  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
201  ; LEGACY-MESA-VI-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16)
202  ; LEGACY-MESA-VI-NEXT:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
203  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
204  %ext = sext i16 %in to i32
205  store i32 %ext, i32 addrspace(1)* %out, align 4
206  ret void
207}
208
209define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
210  ; HSA-VI-LABEL: name: i32_arg
211  ; HSA-VI: bb.1.entry:
212  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
213  ; HSA-VI-NEXT: {{  $}}
214  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
215  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
216  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
217  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
218  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
219  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
220  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
221  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
222  ; HSA-VI-NEXT:   S_ENDPGM 0
223  ; LEGACY-MESA-VI-LABEL: name: i32_arg
224  ; LEGACY-MESA-VI: bb.1.entry:
225  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
226  ; LEGACY-MESA-VI-NEXT: {{  $}}
227  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
228  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
229  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
230  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
231  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
232  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
233  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
234  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
235  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
236entry:
237  store i32 %in, i32 addrspace(1)* %out, align 4
238  ret void
239}
240
241define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
242  ; HSA-VI-LABEL: name: f32_arg
243  ; HSA-VI: bb.1.entry:
244  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
245  ; HSA-VI-NEXT: {{  $}}
246  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
247  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
248  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
249  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
250  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
251  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
252  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
253  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
254  ; HSA-VI-NEXT:   S_ENDPGM 0
255  ; LEGACY-MESA-VI-LABEL: name: f32_arg
256  ; LEGACY-MESA-VI: bb.1.entry:
257  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
258  ; LEGACY-MESA-VI-NEXT: {{  $}}
259  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
260  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
261  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
262  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
263  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
264  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
265  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
266  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
267  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
268entry:
269  store float %in, float addrspace(1)* %out, align 4
270  ret void
271}
272
273define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
274  ; HSA-VI-LABEL: name: v2i8_arg
275  ; HSA-VI: bb.1.entry:
276  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
277  ; HSA-VI-NEXT: {{  $}}
278  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
279  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
280  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
281  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
282  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
283  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
284  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 8, addrspace 4)
285  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1)
286  ; HSA-VI-NEXT:   S_ENDPGM 0
287  ; LEGACY-MESA-VI-LABEL: name: v2i8_arg
288  ; LEGACY-MESA-VI: bb.1.entry:
289  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
290  ; LEGACY-MESA-VI-NEXT: {{  $}}
291  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
292  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
293  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
294  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
295  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
296  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
297  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 4, addrspace 4)
298  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1)
299  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
300entry:
301  store <2 x i8> %in, <2 x i8> addrspace(1)* %out
302  ret void
303}
304
305define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
306  ; HSA-VI-LABEL: name: v2i16_arg
307  ; HSA-VI: bb.1.entry:
308  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
309  ; HSA-VI-NEXT: {{  $}}
310  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
311  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
312  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
313  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
314  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
315  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
316  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), align 8, addrspace 4)
317  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1)
318  ; HSA-VI-NEXT:   S_ENDPGM 0
319  ; LEGACY-MESA-VI-LABEL: name: v2i16_arg
320  ; LEGACY-MESA-VI: bb.1.entry:
321  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
322  ; LEGACY-MESA-VI-NEXT: {{  $}}
323  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
324  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
325  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
326  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
327  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
328  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
329  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), addrspace 4)
330  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1)
331  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
332entry:
333  store <2 x i16> %in, <2 x i16> addrspace(1)* %out
334  ret void
335}
336
337define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
338  ; HSA-VI-LABEL: name: v2i32_arg
339  ; HSA-VI: bb.1.entry:
340  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
341  ; HSA-VI-NEXT: {{  $}}
342  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
343  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
344  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
345  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
346  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
347  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
348  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4)
349  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
350  ; HSA-VI-NEXT:   S_ENDPGM 0
351  ; LEGACY-MESA-VI-LABEL: name: v2i32_arg
352  ; LEGACY-MESA-VI: bb.1.entry:
353  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
354  ; LEGACY-MESA-VI-NEXT: {{  $}}
355  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
356  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
357  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
358  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
359  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
360  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
361  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
362  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
363  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
364entry:
365  store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
366  ret void
367}
368
369define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
370  ; HSA-VI-LABEL: name: v2f32_arg
371  ; HSA-VI: bb.1.entry:
372  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
373  ; HSA-VI-NEXT: {{  $}}
374  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
375  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
376  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
377  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
378  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
379  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
380  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4)
381  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
382  ; HSA-VI-NEXT:   S_ENDPGM 0
383  ; LEGACY-MESA-VI-LABEL: name: v2f32_arg
384  ; LEGACY-MESA-VI: bb.1.entry:
385  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
386  ; LEGACY-MESA-VI-NEXT: {{  $}}
387  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
388  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
389  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
390  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
391  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
392  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
393  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
394  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
395  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
396entry:
397  store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
398  ret void
399}
400
401define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
402  ; HSA-VI-LABEL: name: v3i8_arg
403  ; HSA-VI: bb.1.entry:
404  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
405  ; HSA-VI-NEXT: {{  $}}
406  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
407  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
408  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
409  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
410  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
411  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
412  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 8, addrspace 4)
413  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1)
414  ; HSA-VI-NEXT:   S_ENDPGM 0
415  ; LEGACY-MESA-VI-LABEL: name: v3i8_arg
416  ; LEGACY-MESA-VI: bb.1.entry:
417  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
418  ; LEGACY-MESA-VI-NEXT: {{  $}}
419  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
420  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
421  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
422  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
423  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
424  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
425  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 4, addrspace 4)
426  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1)
427  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
428entry:
429  store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
430  ret void
431}
432
433define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
434  ; HSA-VI-LABEL: name: v3i16_arg
435  ; HSA-VI: bb.1.entry:
436  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
437  ; HSA-VI-NEXT: {{  $}}
438  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
439  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
440  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
441  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
442  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
443  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
444  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 8, addrspace 4)
445  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1)
446  ; HSA-VI-NEXT:   S_ENDPGM 0
447  ; LEGACY-MESA-VI-LABEL: name: v3i16_arg
448  ; LEGACY-MESA-VI: bb.1.entry:
449  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
450  ; LEGACY-MESA-VI-NEXT: {{  $}}
451  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
452  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
453  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
454  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
455  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
456  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
457  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 4, addrspace 4)
458  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1)
459  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
460entry:
461  store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
462  ret void
463}
464
465define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
466  ; HSA-VI-LABEL: name: v3i32_arg
467  ; HSA-VI: bb.1.entry:
468  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
469  ; HSA-VI-NEXT: {{  $}}
470  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
471  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
472  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
473  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
474  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
475  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
476  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4)
477  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
478  ; HSA-VI-NEXT:   S_ENDPGM 0
479  ; LEGACY-MESA-VI-LABEL: name: v3i32_arg
480  ; LEGACY-MESA-VI: bb.1.entry:
481  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
482  ; LEGACY-MESA-VI-NEXT: {{  $}}
483  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
484  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
485  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
486  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
487  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
488  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
489  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4)
490  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
491  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
492entry:
493  store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
494  ret void
495}
496
497define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
498  ; HSA-VI-LABEL: name: v3f32_arg
499  ; HSA-VI: bb.1.entry:
500  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
501  ; HSA-VI-NEXT: {{  $}}
502  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
503  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
504  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
505  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
506  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
507  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
508  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4)
509  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
510  ; HSA-VI-NEXT:   S_ENDPGM 0
511  ; LEGACY-MESA-VI-LABEL: name: v3f32_arg
512  ; LEGACY-MESA-VI: bb.1.entry:
513  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
514  ; LEGACY-MESA-VI-NEXT: {{  $}}
515  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
516  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
517  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
518  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
519  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
520  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
521  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4)
522  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
523  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
524entry:
525  store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
526  ret void
527}
528
529define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
530  ; HSA-VI-LABEL: name: v4i8_arg
531  ; HSA-VI: bb.1.entry:
532  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
533  ; HSA-VI-NEXT: {{  $}}
534  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
535  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
536  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
537  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
538  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
539  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
540  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), align 8, addrspace 4)
541  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1)
542  ; HSA-VI-NEXT:   S_ENDPGM 0
543  ; LEGACY-MESA-VI-LABEL: name: v4i8_arg
544  ; LEGACY-MESA-VI: bb.1.entry:
545  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
546  ; LEGACY-MESA-VI-NEXT: {{  $}}
547  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
548  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
549  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
550  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
551  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
552  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
553  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), addrspace 4)
554  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1)
555  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
556entry:
557  store <4 x i8> %in, <4 x i8> addrspace(1)* %out
558  ret void
559}
560
561define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
562  ; HSA-VI-LABEL: name: v4i16_arg
563  ; HSA-VI: bb.1.entry:
564  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
565  ; HSA-VI-NEXT: {{  $}}
566  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
567  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
568  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
569  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
570  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
571  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
572  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), addrspace 4)
573  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1)
574  ; HSA-VI-NEXT:   S_ENDPGM 0
575  ; LEGACY-MESA-VI-LABEL: name: v4i16_arg
576  ; LEGACY-MESA-VI: bb.1.entry:
577  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
578  ; LEGACY-MESA-VI-NEXT: {{  $}}
579  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
580  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
581  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
582  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
583  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
584  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
585  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), align 4, addrspace 4)
586  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1)
587  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
588entry:
589  store <4 x i16> %in, <4 x i16> addrspace(1)* %out
590  ret void
591}
592
593define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
594  ; HSA-VI-LABEL: name: v4i32_arg
595  ; HSA-VI: bb.1.entry:
596  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
597  ; HSA-VI-NEXT: {{  $}}
598  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
599  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
600  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
601  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
602  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
603  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
604  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4)
605  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
606  ; HSA-VI-NEXT:   S_ENDPGM 0
607  ; LEGACY-MESA-VI-LABEL: name: v4i32_arg
608  ; LEGACY-MESA-VI: bb.1.entry:
609  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
610  ; LEGACY-MESA-VI-NEXT: {{  $}}
611  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
612  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
613  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
614  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
615  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
616  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
617  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4)
618  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
619  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
620entry:
621  store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
622  ret void
623}
624
625define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
626  ; HSA-VI-LABEL: name: v4f32_arg
627  ; HSA-VI: bb.1.entry:
628  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
629  ; HSA-VI-NEXT: {{  $}}
630  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
631  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
632  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
633  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
634  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
635  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
636  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4)
637  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
638  ; HSA-VI-NEXT:   S_ENDPGM 0
639  ; LEGACY-MESA-VI-LABEL: name: v4f32_arg
640  ; LEGACY-MESA-VI: bb.1.entry:
641  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
642  ; LEGACY-MESA-VI-NEXT: {{  $}}
643  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
644  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
645  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
646  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
647  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
648  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
649  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4)
650  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
651  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
652entry:
653  store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
654  ret void
655}
656
657define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
658  ; HSA-VI-LABEL: name: v8i8_arg
659  ; HSA-VI: bb.1.entry:
660  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
661  ; HSA-VI-NEXT: {{  $}}
662  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
663  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
664  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
665  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
666  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
667  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
668  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), addrspace 4)
669  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1)
670  ; HSA-VI-NEXT:   S_ENDPGM 0
671  ; LEGACY-MESA-VI-LABEL: name: v8i8_arg
672  ; LEGACY-MESA-VI: bb.1.entry:
673  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
674  ; LEGACY-MESA-VI-NEXT: {{  $}}
675  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
676  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
677  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
678  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
679  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
680  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
681  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), align 4, addrspace 4)
682  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1)
683  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
684entry:
685  store <8 x i8> %in, <8 x i8> addrspace(1)* %out
686  ret void
687}
688
689define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
690  ; HSA-VI-LABEL: name: v8i16_arg
691  ; HSA-VI: bb.1.entry:
692  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
693  ; HSA-VI-NEXT: {{  $}}
694  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
695  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
696  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
697  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
698  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
699  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
700  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), addrspace 4)
701  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1)
702  ; HSA-VI-NEXT:   S_ENDPGM 0
703  ; LEGACY-MESA-VI-LABEL: name: v8i16_arg
704  ; LEGACY-MESA-VI: bb.1.entry:
705  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
706  ; LEGACY-MESA-VI-NEXT: {{  $}}
707  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
708  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
709  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
710  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
711  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
712  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
713  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), align 4, addrspace 4)
714  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1)
715  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
716entry:
717  store <8 x i16> %in, <8 x i16> addrspace(1)* %out
718  ret void
719}
720
721define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
722  ; HSA-VI-LABEL: name: v8i32_arg
723  ; HSA-VI: bb.1.entry:
724  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
725  ; HSA-VI-NEXT: {{  $}}
726  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
727  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
728  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
729  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
730  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
731  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
732  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4)
733  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
734  ; HSA-VI-NEXT:   S_ENDPGM 0
735  ; LEGACY-MESA-VI-LABEL: name: v8i32_arg
736  ; LEGACY-MESA-VI: bb.1.entry:
737  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
738  ; LEGACY-MESA-VI-NEXT: {{  $}}
739  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
740  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
741  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
742  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
743  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
744  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
745  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4)
746  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
747  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
748entry:
749  store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
750  ret void
751}
752
753define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
754  ; HSA-VI-LABEL: name: v8f32_arg
755  ; HSA-VI: bb.1.entry:
756  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
757  ; HSA-VI-NEXT: {{  $}}
758  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
759  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
760  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
761  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
762  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
763  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
764  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4)
765  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
766  ; HSA-VI-NEXT:   S_ENDPGM 0
767  ; LEGACY-MESA-VI-LABEL: name: v8f32_arg
768  ; LEGACY-MESA-VI: bb.1.entry:
769  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
770  ; LEGACY-MESA-VI-NEXT: {{  $}}
771  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
772  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
773  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
774  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
775  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
776  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
777  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4)
778  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
779  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
780entry:
781  store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
782  ret void
783}
784
785define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
786  ; HSA-VI-LABEL: name: v16i8_arg
787  ; HSA-VI: bb.1.entry:
788  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
789  ; HSA-VI-NEXT: {{  $}}
790  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
791  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
792  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
793  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
794  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
795  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
796  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), addrspace 4)
797  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1)
798  ; HSA-VI-NEXT:   S_ENDPGM 0
799  ; LEGACY-MESA-VI-LABEL: name: v16i8_arg
800  ; LEGACY-MESA-VI: bb.1.entry:
801  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
802  ; LEGACY-MESA-VI-NEXT: {{  $}}
803  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
804  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
805  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
806  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
807  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
808  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
809  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), align 4, addrspace 4)
810  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1)
811  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
812entry:
813  store <16 x i8> %in, <16 x i8> addrspace(1)* %out
814  ret void
815}
816
817define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
818  ; HSA-VI-LABEL: name: v16i16_arg
819  ; HSA-VI: bb.1.entry:
820  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
821  ; HSA-VI-NEXT: {{  $}}
822  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
823  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
824  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
825  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
826  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
827  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
828  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 16, addrspace 4)
829  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1)
830  ; HSA-VI-NEXT:   S_ENDPGM 0
831  ; LEGACY-MESA-VI-LABEL: name: v16i16_arg
832  ; LEGACY-MESA-VI: bb.1.entry:
833  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
834  ; LEGACY-MESA-VI-NEXT: {{  $}}
835  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
836  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
837  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
838  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
839  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
840  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
841  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 4, addrspace 4)
842  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1)
843  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
844entry:
845  store <16 x i16> %in, <16 x i16> addrspace(1)* %out
846  ret void
847}
848
849define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
850  ; HSA-VI-LABEL: name: v16i32_arg
851  ; HSA-VI: bb.1.entry:
852  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
853  ; HSA-VI-NEXT: {{  $}}
854  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
855  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
856  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
857  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
858  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
859  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
860  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4)
861  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
862  ; HSA-VI-NEXT:   S_ENDPGM 0
863  ; LEGACY-MESA-VI-LABEL: name: v16i32_arg
864  ; LEGACY-MESA-VI: bb.1.entry:
865  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
866  ; LEGACY-MESA-VI-NEXT: {{  $}}
867  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
868  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
869  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
870  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
871  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
872  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
873  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4)
874  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
875  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
876entry:
877  store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
878  ret void
879}
880
881define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
882  ; HSA-VI-LABEL: name: v16f32_arg
883  ; HSA-VI: bb.1.entry:
884  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
885  ; HSA-VI-NEXT: {{  $}}
886  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
887  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
888  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
889  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
890  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
891  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
892  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4)
893  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
894  ; HSA-VI-NEXT:   S_ENDPGM 0
895  ; LEGACY-MESA-VI-LABEL: name: v16f32_arg
896  ; LEGACY-MESA-VI: bb.1.entry:
897  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
898  ; LEGACY-MESA-VI-NEXT: {{  $}}
899  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
900  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
901  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
902  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
903  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
904  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
905  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4)
906  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
907  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
908entry:
909  store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
910  ret void
911}
912
913define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
914  ; HSA-VI-LABEL: name: kernel_arg_i64
915  ; HSA-VI: bb.1 (%ir-block.0):
916  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
917  ; HSA-VI-NEXT: {{  $}}
918  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
919  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
920  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
921  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
922  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
923  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
924  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
925  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
926  ; HSA-VI-NEXT:   S_ENDPGM 0
927  ; LEGACY-MESA-VI-LABEL: name: kernel_arg_i64
928  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
929  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
930  ; LEGACY-MESA-VI-NEXT: {{  $}}
931  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
932  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
933  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
934  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
935  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
936  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
937  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
938  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
939  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
940  store i64 %a, i64 addrspace(1)* %out, align 8
941  ret void
942}
943
944define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double  %in) {
945  ; HSA-VI-LABEL: name: f64_kernel_arg
946  ; HSA-VI: bb.1.entry:
947  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
948  ; HSA-VI-NEXT: {{  $}}
949  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
950  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
951  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
952  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
953  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
954  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
955  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
956  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
957  ; HSA-VI-NEXT:   S_ENDPGM 0
958  ; LEGACY-MESA-VI-LABEL: name: f64_kernel_arg
959  ; LEGACY-MESA-VI: bb.1.entry:
960  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
961  ; LEGACY-MESA-VI-NEXT: {{  $}}
962  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
963  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
964  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
965  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
966  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
967  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
968  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
969  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
970  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
971entry:
972  store double %in, double addrspace(1)* %out
973  ret void
974}
975
976define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
977  ; HSA-VI-LABEL: name: i1_arg
978  ; HSA-VI: bb.1 (%ir-block.0):
979  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
980  ; HSA-VI-NEXT: {{  $}}
981  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
982  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
983  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
984  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
985  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
986  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
987  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
988  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1)
989  ; HSA-VI-NEXT:   S_ENDPGM 0
990  ; LEGACY-MESA-VI-LABEL: name: i1_arg
991  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
992  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
993  ; LEGACY-MESA-VI-NEXT: {{  $}}
994  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
995  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
996  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
997  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
998  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
999  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1000  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
1001  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1)
1002  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1003  store i1 %x, i1 addrspace(1)* %out, align 1
1004  ret void
1005}
1006
1007define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
1008  ; HSA-VI-LABEL: name: i1_arg_zext_i32
1009  ; HSA-VI: bb.1 (%ir-block.0):
1010  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1011  ; HSA-VI-NEXT: {{  $}}
1012  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1013  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1014  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1015  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1016  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1017  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1018  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
1019  ; HSA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
1020  ; HSA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1021  ; HSA-VI-NEXT:   S_ENDPGM 0
1022  ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i32
1023  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1024  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1025  ; LEGACY-MESA-VI-NEXT: {{  $}}
1026  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1027  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1028  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1029  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1030  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1031  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1032  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
1033  ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
1034  ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1035  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1036  %ext = zext i1 %x to i32
1037  store i32 %ext, i32 addrspace(1)* %out, align 4
1038  ret void
1039}
1040
1041define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
1042  ; HSA-VI-LABEL: name: i1_arg_zext_i64
1043  ; HSA-VI: bb.1 (%ir-block.0):
1044  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1045  ; HSA-VI-NEXT: {{  $}}
1046  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1047  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1048  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1049  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1050  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1051  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1052  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
1053  ; HSA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
1054  ; HSA-VI-NEXT:   G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
1055  ; HSA-VI-NEXT:   S_ENDPGM 0
1056  ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i64
1057  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1058  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1059  ; LEGACY-MESA-VI-NEXT: {{  $}}
1060  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1061  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1062  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1063  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1064  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1065  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1066  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
1067  ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
1068  ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
1069  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1070  %ext = zext i1 %x to i64
1071  store i64 %ext, i64 addrspace(1)* %out, align 8
1072  ret void
1073}
1074
1075define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
1076  ; HSA-VI-LABEL: name: i1_arg_sext_i32
1077  ; HSA-VI: bb.1 (%ir-block.0):
1078  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1079  ; HSA-VI-NEXT: {{  $}}
1080  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1081  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1082  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1083  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1084  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1085  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1086  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
1087  ; HSA-VI-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
1088  ; HSA-VI-NEXT:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1089  ; HSA-VI-NEXT:   S_ENDPGM 0
1090  ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i32
1091  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1092  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1093  ; LEGACY-MESA-VI-NEXT: {{  $}}
1094  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1095  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1096  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1097  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1098  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1099  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1100  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
1101  ; LEGACY-MESA-VI-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
1102  ; LEGACY-MESA-VI-NEXT:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1103  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1104  %ext = sext i1 %x to i32
1105  store i32 %ext, i32addrspace(1)* %out, align 4
1106  ret void
1107}
1108
1109define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
1110  ; HSA-VI-LABEL: name: i1_arg_sext_i64
1111  ; HSA-VI: bb.1 (%ir-block.0):
1112  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1113  ; HSA-VI-NEXT: {{  $}}
1114  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1115  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1116  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1117  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1118  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1119  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1120  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
1121  ; HSA-VI-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
1122  ; HSA-VI-NEXT:   G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
1123  ; HSA-VI-NEXT:   S_ENDPGM 0
1124  ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i64
1125  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1126  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1127  ; LEGACY-MESA-VI-NEXT: {{  $}}
1128  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1129  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1130  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1131  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1132  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1133  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1134  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
1135  ; LEGACY-MESA-VI-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
1136  ; LEGACY-MESA-VI-NEXT:   G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
1137  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1138  %ext = sext i1 %x to i64
1139  store i64 %ext, i64 addrspace(1)* %out, align 8
1140  ret void
1141}
1142
1143; 0-sized arguments do not add a slot to the argument register set, so
1144; waste an index in the virtual register array.
1145define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind {
1146  ; HSA-VI-LABEL: name: empty_struct_arg
1147  ; HSA-VI: bb.1 (%ir-block.0):
1148  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1149  ; HSA-VI-NEXT: {{  $}}
1150  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1151  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1152  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1153  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1154  ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1155  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1156  ; HSA-VI-NEXT:   S_ENDPGM 0
1157  ; LEGACY-MESA-VI-LABEL: name: empty_struct_arg
1158  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1159  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1160  ; LEGACY-MESA-VI-NEXT: {{  $}}
1161  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1162  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1163  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1164  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1165  ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1166  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1167  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1168  store i32 %arg1, i32 addrspace(1)* undef
1169  ret void
1170}
1171
1172define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind {
1173  ; HSA-VI-LABEL: name: empty_array_arg
1174  ; HSA-VI: bb.1 (%ir-block.0):
1175  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1176  ; HSA-VI-NEXT: {{  $}}
1177  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1178  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1179  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1180  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1181  ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1182  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1183  ; HSA-VI-NEXT:   S_ENDPGM 0
1184  ; LEGACY-MESA-VI-LABEL: name: empty_array_arg
1185  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1186  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1187  ; LEGACY-MESA-VI-NEXT: {{  $}}
1188  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1189  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1190  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1191  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1192  ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1193  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1194  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1195  store i32 %arg1, i32 addrspace(1)* undef
1196  ret void
1197}
1198
1199; The correct load offsets for these:
1200; load 4 from 0,
1201; load 8 from 8
1202; load 4 from 24
1203; load 8 from 32
1204
1205; With the SelectionDAG argument lowering, the alignments for the
1206; struct members is not properly considered, making these wrong.
1207define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad, {i32, i64} %arg1) {
1208  ; HSA-VI-LABEL: name: struct_argument_alignment
1209  ; HSA-VI: bb.1 (%ir-block.0):
1210  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1211  ; HSA-VI-NEXT: {{  $}}
1212  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1213  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1214  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1215  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1216  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1217  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1218  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
1219  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1220  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1221  ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
1222  ; HSA-VI-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
1223  ; HSA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1224  ; HSA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
1225  ; HSA-VI-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
1226  ; HSA-VI-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
1227  ; HSA-VI-NEXT:   [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
1228  ; HSA-VI-NEXT:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1229  ; HSA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1230  ; HSA-VI-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1231  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
1232  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
1233  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
1234  ; HSA-VI-NEXT:   G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
1235  ; HSA-VI-NEXT:   G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
1236  ; HSA-VI-NEXT:   S_ENDPGM 0
1237  ; LEGACY-MESA-VI-LABEL: name: struct_argument_alignment
1238  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1239  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1240  ; LEGACY-MESA-VI-NEXT: {{  $}}
1241  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1242  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1243  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1244  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1245  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1246  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1247  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1248  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
1249  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1250  ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
1251  ; LEGACY-MESA-VI-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
1252  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1253  ; LEGACY-MESA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1254  ; LEGACY-MESA-VI-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
1255  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
1256  ; LEGACY-MESA-VI-NEXT:   [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1257  ; LEGACY-MESA-VI-NEXT:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1258  ; LEGACY-MESA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1259  ; LEGACY-MESA-VI-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1260  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
1261  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
1262  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
1263  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
1264  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
1265  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1266  %val0 = extractvalue {i32, i64} %arg0, 0
1267  %val1 = extractvalue {i32, i64} %arg0, 1
1268  %val2 = extractvalue {i32, i64} %arg1, 0
1269  %val3 = extractvalue {i32, i64} %arg1, 1
1270  store volatile i32 %val0, i32 addrspace(1)* null
1271  store volatile i64 %val1, i64 addrspace(1)* null
1272  store volatile i8 %pad, i8 addrspace(1)* null
1273  store volatile i32 %val2, i32 addrspace(1)* null
1274  store volatile i64 %val3, i64 addrspace(1)* null
1275  ret void
1276}
1277
1278define amdgpu_kernel void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %arg0, i8 %pad, {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1) {
1279  ; HSA-VI-LABEL: name: pointer_in_struct_argument
1280  ; HSA-VI: bb.1 (%ir-block.0):
1281  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1282  ; HSA-VI-NEXT: {{  $}}
1283  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1284  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1285  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1286  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1287  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1288  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1289  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
1290  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1291  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1292  ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
1293  ; HSA-VI-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
1294  ; HSA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1295  ; HSA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
1296  ; HSA-VI-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
1297  ; HSA-VI-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
1298  ; HSA-VI-NEXT:   [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
1299  ; HSA-VI-NEXT:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1300  ; HSA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1301  ; HSA-VI-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1302  ; HSA-VI-NEXT:   [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1303  ; HSA-VI-NEXT:   G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
1304  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
1305  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
1306  ; HSA-VI-NEXT:   G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
1307  ; HSA-VI-NEXT:   G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
1308  ; HSA-VI-NEXT:   S_ENDPGM 0
1309  ; LEGACY-MESA-VI-LABEL: name: pointer_in_struct_argument
1310  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1311  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1312  ; LEGACY-MESA-VI-NEXT: {{  $}}
1313  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1314  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1315  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1316  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1317  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1318  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1319  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1320  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
1321  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1322  ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
1323  ; LEGACY-MESA-VI-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
1324  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1325  ; LEGACY-MESA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1326  ; LEGACY-MESA-VI-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
1327  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
1328  ; LEGACY-MESA-VI-NEXT:   [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1329  ; LEGACY-MESA-VI-NEXT:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1330  ; LEGACY-MESA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1331  ; LEGACY-MESA-VI-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1332  ; LEGACY-MESA-VI-NEXT:   [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
1333  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
1334  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
1335  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
1336  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
1337  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
1338  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1339  %val0 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 0
1340  %val1 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 1
1341  %val2 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 0
1342  %val3 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 1
1343  store volatile i8 addrspace(3)* %val0, i8 addrspace(3)* addrspace(1)* null
1344  store volatile i8 addrspace(1)* %val1, i8 addrspace(1)* addrspace(1)* null
1345  store volatile i8 %pad, i8 addrspace(1)* null
1346  store volatile i8 addrspace(3)* %val2, i8 addrspace(3)* addrspace(1)* null
1347  store volatile i8 addrspace(1234)* %val3, i8 addrspace(1234)* addrspace(1)* null
1348  ret void
1349}
1350
1351; No padding between i8 and next struct, but round up at end to 4 byte
1352; multiple.
1353define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) {
1354  ; HSA-VI-LABEL: name: packed_struct_argument_alignment
1355  ; HSA-VI: bb.1 (%ir-block.1):
1356  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1357  ; HSA-VI-NEXT: {{  $}}
1358  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1359  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1360  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1361  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1362  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
1363  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1364  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1365  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 13
1366  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1367  ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4)
1368  ; HSA-VI-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 17
1369  ; HSA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1370  ; HSA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4)
1371  ; HSA-VI-NEXT:   [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1372  ; HSA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1)
1373  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
1374  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
1375  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
1376  ; HSA-VI-NEXT:   G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
1377  ; HSA-VI-NEXT:   S_ENDPGM 0
1378  ; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment
1379  ; LEGACY-MESA-VI: bb.1 (%ir-block.1):
1380  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1381  ; LEGACY-MESA-VI-NEXT: {{  $}}
1382  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1383  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1384  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1385  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1386  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
1387  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1388  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1389  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 49
1390  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1391  ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4)
1392  ; LEGACY-MESA-VI-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 53
1393  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1394  ; LEGACY-MESA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4)
1395  ; LEGACY-MESA-VI-NEXT:   [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1396  ; LEGACY-MESA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1)
1397  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
1398  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
1399  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
1400  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
1401  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1402  %val0 = extractvalue <{i32, i64}> %arg0, 0
1403  %val1 = extractvalue <{i32, i64}> %arg0, 1
1404  %val2 = extractvalue <{i32, i64}> %arg1, 0
1405  %val3 = extractvalue <{i32, i64}> %arg1, 1
1406  store volatile i32 %val0, i32 addrspace(1)* null
1407  store volatile i64 %val1, i64 addrspace(1)* null
1408  store volatile i32 %val2, i32 addrspace(1)* null
1409  store volatile i64 %val3, i64 addrspace(1)* null
1410  ret void
1411}
1412
1413define amdgpu_kernel void @unused_i32_arg(i32 addrspace(1)* nocapture %out, i32 %unused, i32 %in) nounwind {
1414  ; HSA-VI-LABEL: name: unused_i32_arg
1415  ; HSA-VI: bb.1.entry:
1416  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1417  ; HSA-VI-NEXT: {{  $}}
1418  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1419  ; HSA-VI-NEXT:   S_ENDPGM 0
1420  ; LEGACY-MESA-VI-LABEL: name: unused_i32_arg
1421  ; LEGACY-MESA-VI: bb.1.entry:
1422  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1423  ; LEGACY-MESA-VI-NEXT: {{  $}}
1424  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1425  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1426entry:
1427  ret void
1428}
1429
1430; Byref pointers should only be treated as offsets from kernarg
1431define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %out, i8 addrspace(4)* byref(i8) %in.byref) {
1432  ; HSA-VI-LABEL: name: byref_constant_i8_arg
1433  ; HSA-VI: bb.1 (%ir-block.0):
1434  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1435  ; HSA-VI-NEXT: {{  $}}
1436  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1437  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1438  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1439  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1440  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1441  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1442  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4)
1443  ; HSA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
1444  ; HSA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1445  ; HSA-VI-NEXT:   S_ENDPGM 0
1446  ; LEGACY-MESA-VI-LABEL: name: byref_constant_i8_arg
1447  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1448  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1449  ; LEGACY-MESA-VI-NEXT: {{  $}}
1450  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1451  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1452  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1453  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1454  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1455  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1456  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4)
1457  ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
1458  ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1459  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1460  %in = load i8, i8 addrspace(4)* %in.byref
1461  %ext = zext i8 %in to i32
1462  store i32 %ext, i32 addrspace(1)* %out, align 4
1463  ret void
1464}
1465
1466define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %out, i16 addrspace(4)* byref(i16) align 2 %in.byref) {
1467  ; HSA-VI-LABEL: name: byref_constant_i16_arg
1468  ; HSA-VI: bb.1 (%ir-block.0):
1469  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1470  ; HSA-VI-NEXT: {{  $}}
1471  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1472  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1473  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1474  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1475  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1476  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1477  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4)
1478  ; HSA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
1479  ; HSA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1480  ; HSA-VI-NEXT:   S_ENDPGM 0
1481  ; LEGACY-MESA-VI-LABEL: name: byref_constant_i16_arg
1482  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1483  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1484  ; LEGACY-MESA-VI-NEXT: {{  $}}
1485  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1486  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1487  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1488  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1489  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1490  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1491  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4)
1492  ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
1493  ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1494  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1495  %in = load i16, i16 addrspace(4)* %in.byref
1496  %ext = zext i16 %in to i32
1497  store i32 %ext, i32 addrspace(1)* %out, align 4
1498  ret void
1499}
1500
1501define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align 4 %in.byref, i32 %after.offset) {
1502  ; HSA-VI-LABEL: name: byref_constant_i32_arg
1503  ; HSA-VI: bb.1 (%ir-block.0):
1504  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1505  ; HSA-VI-NEXT: {{  $}}
1506  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1507  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1508  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1509  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1510  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1511  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1512  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
1513  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1514  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1515  ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
1516  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1517  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1518  ; HSA-VI-NEXT:   S_ENDPGM 0
1519  ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg
1520  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1521  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1522  ; LEGACY-MESA-VI-NEXT: {{  $}}
1523  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1524  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1525  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1526  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1527  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1528  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1529  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
1530  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1531  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1532  ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
1533  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1534  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1535  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1536  %in = load i32, i32 addrspace(4)* %in.byref
1537  store volatile i32 %in, i32 addrspace(1)* %out, align 4
1538  store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
1539  ret void
1540}
1541
1542define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> addrspace(4)* byref(<4 x i32>) align(16) %in.byref, i32 %after.offset) {
1543  ; HSA-VI-LABEL: name: byref_constant_v4i32_arg
1544  ; HSA-VI: bb.1 (%ir-block.0):
1545  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1546  ; HSA-VI-NEXT: {{  $}}
1547  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1548  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1549  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1550  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1551  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1552  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1553  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
1554  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1555  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1556  ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4)
1557  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1)
1558  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.cast, addrspace 1)
1559  ; HSA-VI-NEXT:   S_ENDPGM 0
1560  ; LEGACY-MESA-VI-LABEL: name: byref_constant_v4i32_arg
1561  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1562  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1563  ; LEGACY-MESA-VI-NEXT: {{  $}}
1564  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1565  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1566  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1567  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1568  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
1569  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1570  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
1571  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1572  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1573  ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4)
1574  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1)
1575  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.cast, addrspace 1)
1576  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1577  %in = load <4 x i32>, <4 x i32> addrspace(4)* %in.byref
1578  store volatile <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
1579  %out.cast = bitcast <4 x i32> addrspace(1)* %out to i32 addrspace(1)*
1580  store volatile i32 %after.offset, i32 addrspace(1)* %out.cast, align 4
1581  ret void
1582}
1583
1584define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(256) %in.byref, i32 %after.offset) {
1585  ; HSA-VI-LABEL: name: byref_align_constant_i32_arg
1586  ; HSA-VI: bb.1 (%ir-block.0):
1587  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1588  ; HSA-VI-NEXT: {{  $}}
1589  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1590  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1591  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1592  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1593  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
1594  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1595  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 260
1596  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1597  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1598  ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
1599  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1600  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1601  ; HSA-VI-NEXT:   S_ENDPGM 0
1602  ; LEGACY-MESA-VI-LABEL: name: byref_align_constant_i32_arg
1603  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1604  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1605  ; LEGACY-MESA-VI-NEXT: {{  $}}
1606  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1607  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1608  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1609  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1610  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 292
1611  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1612  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 296
1613  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1614  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
1615  ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
1616  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1617  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1618  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1619  %in = load i32, i32 addrspace(4)* %in.byref
1620  store volatile i32 %in, i32 addrspace(1)* %out, align 4
1621  store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
1622  ret void
1623}
1624
1625define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace(1)* nocapture %out, i8, <16 x i32> addrspace(4)* byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) {
1626  ; HSA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg
1627  ; HSA-VI: bb.1 (%ir-block.1):
1628  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1629  ; HSA-VI-NEXT: {{  $}}
1630  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1631  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1632  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1633  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1634  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
1635  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1636  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
1637  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1638  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1639  ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4)
1640  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.cast.out, align 4, addrspace 1)
1641  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1642  ; HSA-VI-NEXT:   S_ENDPGM 0
1643  ; LEGACY-MESA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg
1644  ; LEGACY-MESA-VI: bb.1 (%ir-block.1):
1645  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1646  ; LEGACY-MESA-VI-NEXT: {{  $}}
1647  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1648  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1649  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1650  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1651  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
1652  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1653  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 164
1654  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1655  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1656  ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4)
1657  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.cast.out, align 4, addrspace 1)
1658  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1659  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1660  %in = load <16 x i32>, <16 x i32> addrspace(4)* %in.byref
1661  %cast.out = bitcast i32 addrspace(1)* %out to <16 x i32> addrspace(1)*
1662  store volatile <16 x i32> %in, <16 x i32> addrspace(1)* %cast.out, align 4
1663  store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
1664  ret void
1665}
1666
1667; Also accept byref kernel arguments with other global address spaces.
1668define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* byref(i32) align(4) %in.byref) {
1669  ; HSA-VI-LABEL: name: byref_global_i32_arg
1670  ; HSA-VI: bb.1 (%ir-block.0):
1671  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1672  ; HSA-VI-NEXT: {{  $}}
1673  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1674  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1675  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1676  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1677  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1678  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1679  ; HSA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1680  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p1) :: (dereferenceable "amdgpu-noclobber" load (s32) from %ir.in.byref, addrspace 1)
1681  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1682  ; HSA-VI-NEXT:   S_ENDPGM 0
1683  ; LEGACY-MESA-VI-LABEL: name: byref_global_i32_arg
1684  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1685  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1686  ; LEGACY-MESA-VI-NEXT: {{  $}}
1687  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1688  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1689  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1690  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1691  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1692  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1693  ; LEGACY-MESA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1694  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p1) :: (dereferenceable "amdgpu-noclobber" load (s32) from %ir.in.byref, addrspace 1)
1695  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1696  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1697  %in = load i32, i32 addrspace(1)* %in.byref
1698  store i32 %in, i32 addrspace(1)* %out, align 4
1699  ret void
1700}
1701
1702define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, i32* byref(i32) align(4) %in.byref) {
1703  ; HSA-VI-LABEL: name: byref_flat_i32_arg
1704  ; HSA-VI: bb.1 (%ir-block.0):
1705  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1706  ; HSA-VI-NEXT: {{  $}}
1707  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1708  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1709  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1710  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1711  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1712  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1713  ; HSA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1714  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref)
1715  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1716  ; HSA-VI-NEXT:   S_ENDPGM 0
1717  ; LEGACY-MESA-VI-LABEL: name: byref_flat_i32_arg
1718  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1719  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1720  ; LEGACY-MESA-VI-NEXT: {{  $}}
1721  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1722  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1723  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1724  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1725  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1726  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1727  ; LEGACY-MESA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1728  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref)
1729  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1730  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1731  %in = load i32, i32* %in.byref
1732  store i32 %in, i32 addrspace(1)* %out, align 4
1733  ret void
1734}
1735
1736define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(6)* byref(i32) align(4) %in.byref) {
1737  ; HSA-VI-LABEL: name: byref_constant_32bit_i32_arg
1738  ; HSA-VI: bb.1 (%ir-block.0):
1739  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1740  ; HSA-VI-NEXT: {{  $}}
1741  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1742  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1743  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1744  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1745  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1746  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1747  ; HSA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1748  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6)
1749  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1750  ; HSA-VI-NEXT:   S_ENDPGM 0
1751  ; LEGACY-MESA-VI-LABEL: name: byref_constant_32bit_i32_arg
1752  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1753  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1754  ; LEGACY-MESA-VI-NEXT: {{  $}}
1755  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1756  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1757  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1758  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1759  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1760  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1761  ; LEGACY-MESA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1762  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6)
1763  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1764  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1765  %in = load i32, i32 addrspace(6)* %in.byref
1766  store i32 %in, i32 addrspace(1)* %out, align 4
1767  ret void
1768}
1769
1770define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(999)* byref(i32) align(4) %in.byref) {
1771  ; HSA-VI-LABEL: name: byref_unknown_as_i32_arg
1772  ; HSA-VI: bb.1 (%ir-block.0):
1773  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1774  ; HSA-VI-NEXT: {{  $}}
1775  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1776  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1777  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1778  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1779  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1780  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1781  ; HSA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1782  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999)
1783  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1784  ; HSA-VI-NEXT:   S_ENDPGM 0
1785  ; LEGACY-MESA-VI-LABEL: name: byref_unknown_as_i32_arg
1786  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1787  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1788  ; LEGACY-MESA-VI-NEXT: {{  $}}
1789  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1790  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1791  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1792  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1793  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1794  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1795  ; LEGACY-MESA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1796  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999)
1797  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1798  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1799  %in = load i32, i32 addrspace(999)* %in.byref
1800  store i32 %in, i32 addrspace(1)* %out, align 4
1801  ret void
1802}
1803
1804; Invalid, but should not crash.
1805define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(3)* byref(i32) align(4) %in.byref) {
1806  ; HSA-VI-LABEL: name: byref_local_i32_arg
1807  ; HSA-VI: bb.1 (%ir-block.0):
1808  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1809  ; HSA-VI-NEXT: {{  $}}
1810  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1811  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1812  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1813  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1814  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1815  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1816  ; HSA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1817  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3)
1818  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1819  ; HSA-VI-NEXT:   S_ENDPGM 0
1820  ; LEGACY-MESA-VI-LABEL: name: byref_local_i32_arg
1821  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1822  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1823  ; LEGACY-MESA-VI-NEXT: {{  $}}
1824  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1825  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1826  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1827  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1828  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1829  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1830  ; LEGACY-MESA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1831  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3)
1832  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1833  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1834  %in = load i32, i32 addrspace(3)* %in.byref
1835  store i32 %in, i32 addrspace(1)* %out, align 4
1836  ret void
1837}
1838
1839define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(4) %in0.byref, i32 addrspace(4)* byref(i32) align(4) %in1.byref, i32 %after.offset) {
1840  ; HSA-VI-LABEL: name: multi_byref_constant_i32_arg
1841  ; HSA-VI: bb.1 (%ir-block.0):
1842  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1843  ; HSA-VI-NEXT: {{  $}}
1844  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1845  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1846  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1847  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1848  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1849  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1850  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
1851  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1852  ; HSA-VI-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1853  ; HSA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1854  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1855  ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4)
1856  ; HSA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4)
1857  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1858  ; HSA-VI-NEXT:   G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1859  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1860  ; HSA-VI-NEXT:   S_ENDPGM 0
1861  ; LEGACY-MESA-VI-LABEL: name: multi_byref_constant_i32_arg
1862  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1863  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1864  ; LEGACY-MESA-VI-NEXT: {{  $}}
1865  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1866  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1867  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1868  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1869  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1870  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1871  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
1872  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1873  ; LEGACY-MESA-VI-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
1874  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1875  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1876  ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4)
1877  ; LEGACY-MESA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4)
1878  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1879  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1880  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1881  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1882  %in0 = load i32, i32 addrspace(4)* %in0.byref
1883  %in1 = load i32, i32 addrspace(4)* %in1.byref
1884  store volatile i32 %in0, i32 addrspace(1)* %out, align 4
1885  store volatile i32 %in1, i32 addrspace(1)* %out, align 4
1886  store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
1887  ret void
1888}
1889
1890define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byref(i32) align(4) %in.byref) {
1891  ; HSA-VI-LABEL: name: byref_constant_i32_arg_offset0
1892  ; HSA-VI: bb.1 (%ir-block.0):
1893  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1894  ; HSA-VI-NEXT: {{  $}}
1895  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1896  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1897  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1898  ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1899  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
1900  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1901  ; HSA-VI-NEXT:   S_ENDPGM 0
1902  ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg_offset0
1903  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1904  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1905  ; LEGACY-MESA-VI-NEXT: {{  $}}
1906  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1907  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1908  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1909  ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1910  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
1911  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
1912  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1913  %in = load i32, i32 addrspace(4)* %in.byref
1914  store i32 %in, i32 addrspace(1)* undef, align 4
1915  ret void
1916}
1917
1918define amdgpu_kernel void @p3i8_arg(i8 addrspace(3)* %arg) nounwind {
1919  ; HSA-VI-LABEL: name: p3i8_arg
1920  ; HSA-VI: bb.1 (%ir-block.0):
1921  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1922  ; HSA-VI-NEXT: {{  $}}
1923  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1924  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1925  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1926  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), align 16, addrspace 4)
1927  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
1928  ; HSA-VI-NEXT:   G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3)
1929  ; HSA-VI-NEXT:   S_ENDPGM 0
1930  ; LEGACY-MESA-VI-LABEL: name: p3i8_arg
1931  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1932  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1933  ; LEGACY-MESA-VI-NEXT: {{  $}}
1934  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1935  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1936  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1937  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), addrspace 4)
1938  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
1939  ; LEGACY-MESA-VI-NEXT:   G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3)
1940  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1941  store i8 9, i8 addrspace(3)* %arg, align 4
1942  ret void
1943}
1944
1945define amdgpu_kernel void @p1i8_arg(i8 addrspace(1)* %arg) nounwind {
1946  ; HSA-VI-LABEL: name: p1i8_arg
1947  ; HSA-VI: bb.1 (%ir-block.0):
1948  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1949  ; HSA-VI-NEXT: {{  $}}
1950  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1951  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
1952  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
1953  ; HSA-VI-NEXT:   G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3)
1954  ; HSA-VI-NEXT:   S_ENDPGM 0
1955  ; LEGACY-MESA-VI-LABEL: name: p1i8_arg
1956  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1957  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1958  ; LEGACY-MESA-VI-NEXT: {{  $}}
1959  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1960  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
1961  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
1962  ; LEGACY-MESA-VI-NEXT:   G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3)
1963  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1964  store i8 9, i8 addrspace(3)* null
1965  ret void
1966}
1967
1968define amdgpu_kernel void @v2p1i8_arg(<2 x i8 addrspace(1)*> %arg) nounwind {
1969  ; HSA-VI-LABEL: name: v2p1i8_arg
1970  ; HSA-VI: bb.1 (%ir-block.0):
1971  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1972  ; HSA-VI-NEXT: {{  $}}
1973  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1974  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1975  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1976  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), addrspace 4)
1977  ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1978  ; HSA-VI-NEXT:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
1979  ; HSA-VI-NEXT:   S_ENDPGM 0
1980  ; LEGACY-MESA-VI-LABEL: name: v2p1i8_arg
1981  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1982  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
1983  ; LEGACY-MESA-VI-NEXT: {{  $}}
1984  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1985  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1986  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1987  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), align 4, addrspace 4)
1988  ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1989  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
1990  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
1991  store <2 x i8 addrspace(1)*> %arg, <2 x i8 addrspace(1)*> addrspace(1)* undef
1992  ret void
1993}
1994
1995define amdgpu_kernel void @v2p3i8_arg(<2 x i8 addrspace(3)*> %arg) nounwind {
1996  ; HSA-VI-LABEL: name: v2p3i8_arg
1997  ; HSA-VI: bb.1 (%ir-block.0):
1998  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
1999  ; HSA-VI-NEXT: {{  $}}
2000  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
2001  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
2002  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
2003  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 16, addrspace 4)
2004  ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
2005  ; HSA-VI-NEXT:   G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
2006  ; HSA-VI-NEXT:   S_ENDPGM 0
2007  ; LEGACY-MESA-VI-LABEL: name: v2p3i8_arg
2008  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
2009  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
2010  ; LEGACY-MESA-VI-NEXT: {{  $}}
2011  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
2012  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
2013  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
2014  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 4, addrspace 4)
2015  ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
2016  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
2017  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
2018  store <2 x i8 addrspace(3)*> %arg, <2 x i8 addrspace(3)*> addrspace(1)* undef
2019  ret void
2020}
2021
2022define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg) nounwind {
2023  ; HSA-VI-LABEL: name: v2p1i8_in_struct_arg
2024  ; HSA-VI: bb.1 (%ir-block.0):
2025  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
2026  ; HSA-VI-NEXT: {{  $}}
2027  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
2028  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
2029  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
2030  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), addrspace 4)
2031  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
2032  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
2033  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 16, addrspace 4)
2034  ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
2035  ; HSA-VI-NEXT:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
2036  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
2037  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
2038  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
2039  ; HSA-VI-NEXT:   S_ENDPGM 0
2040  ; LEGACY-MESA-VI-LABEL: name: v2p1i8_in_struct_arg
2041  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
2042  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
2043  ; LEGACY-MESA-VI-NEXT: {{  $}}
2044  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
2045  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
2046  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
2047  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4)
2048  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
2049  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
2050  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
2051  ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
2052  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
2053  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
2054  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
2055  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
2056  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
2057  store { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg, { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef
2058  ret void
2059}
2060