1; RUN: llc < %s -mtriple=nvptx-unknown-unknown | FileCheck %s 2; 3; Check that parameters of a __global__ (kernel) function do not get increased 4; alignment, and no additional vectorization is performed on loads/stores with 5; that parameters. 6; 7; Test IR is a minimized version of IR generated with the following command 8; from the source code below: 9; $ clang++ -O3 --cuda-gpu-arch=sm_35 -S -emit-llvm src.cu 10; 11; ---------------------------------------------------------------------------- 12; #include <stdint.h> 13; 14; struct St4x1 { uint32_t field[1]; }; 15; struct St4x2 { uint32_t field[2]; }; 16; struct St4x3 { uint32_t field[3]; }; 17; struct St4x4 { uint32_t field[4]; }; 18; struct St4x5 { uint32_t field[5]; }; 19; struct St4x6 { uint32_t field[6]; }; 20; struct St4x7 { uint32_t field[7]; }; 21; struct St4x8 { uint32_t field[8]; }; 22; struct St8x1 { uint64_t field[1]; }; 23; struct St8x2 { uint64_t field[2]; }; 24; struct St8x3 { uint64_t field[3]; }; 25; struct St8x4 { uint64_t field[4]; }; 26; 27; #define DECLARE_FUNCTION(StName) \ 28; static __global__ __attribute__((noinline)) \ 29; void foo_##StName(struct StName in, struct StName* ret) { \ 30; const unsigned size = sizeof(ret->field) / sizeof(*ret->field); \ 31; for (unsigned i = 0; i != size; ++i) \ 32; ret->field[i] = in.field[i]; \ 33; } \ 34; 35; DECLARE_FUNCTION(St4x1) 36; DECLARE_FUNCTION(St4x2) 37; DECLARE_FUNCTION(St4x3) 38; DECLARE_FUNCTION(St4x4) 39; DECLARE_FUNCTION(St4x5) 40; DECLARE_FUNCTION(St4x6) 41; DECLARE_FUNCTION(St4x7) 42; DECLARE_FUNCTION(St4x8) 43; DECLARE_FUNCTION(St8x1) 44; DECLARE_FUNCTION(St8x2) 45; DECLARE_FUNCTION(St8x3) 46; DECLARE_FUNCTION(St8x4) 47; ---------------------------------------------------------------------------- 48 49%struct.St4x1 = type { [1 x i32] } 50%struct.St4x2 = type { [2 x i32] } 51%struct.St4x3 = type { [3 x i32] } 52%struct.St4x4 = type { [4 x i32] } 53%struct.St4x5 = type { [5 x i32] } 54%struct.St4x6 = type { [6 x i32] } 55%struct.St4x7 = type { [7 x i32] } 56%struct.St4x8 = type { [8 x i32] } 57%struct.St8x1 = type { [1 x i64] } 58%struct.St8x2 = type { [2 x i64] } 59%struct.St8x3 = type { [3 x i64] } 60%struct.St8x4 = type { [4 x i64] } 61 62define dso_local void @foo_St4x1(%struct.St4x1* nocapture noundef readonly byval(%struct.St4x1) align 4 %in, %struct.St4x1* nocapture noundef writeonly %ret) { 63 ; CHECK-LABEL: .visible .func foo_St4x1( 64 ; CHECK: .param .align 4 .b8 foo_St4x1_param_0[4], 65 ; CHECK: .param .b32 foo_St4x1_param_1 66 ; CHECK: ) 67 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x1_param_1]; 68 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x1_param_0]; 69 ; CHECK: st.u32 [[[R1]]], [[R2]]; 70 ; CHECK: ret; 71 %arrayidx = getelementptr inbounds %struct.St4x1, %struct.St4x1* %in, i64 0, i32 0, i64 0 72 %1 = load i32, i32* %arrayidx, align 4 73 %arrayidx3 = getelementptr inbounds %struct.St4x1, %struct.St4x1* %ret, i64 0, i32 0, i64 0 74 store i32 %1, i32* %arrayidx3, align 4 75 ret void 76} 77 78define dso_local void @foo_St4x2(%struct.St4x2* nocapture noundef readonly byval(%struct.St4x2) align 4 %in, %struct.St4x2* nocapture noundef writeonly %ret) { 79 ; CHECK-LABEL: .visible .func foo_St4x2( 80 ; CHECK: .param .align 4 .b8 foo_St4x2_param_0[8], 81 ; CHECK: .param .b32 foo_St4x2_param_1 82 ; CHECK: ) 83 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x2_param_1]; 84 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x2_param_0]; 85 ; CHECK: st.u32 [[[R1]]], [[R2]]; 86 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x2_param_0+4]; 87 ; CHECK: st.u32 [[[R1]]+4], [[R3]]; 88 ; CHECK: ret; 89 %arrayidx = getelementptr inbounds %struct.St4x2, %struct.St4x2* %in, i64 0, i32 0, i64 0 90 %1 = load i32, i32* %arrayidx, align 4 91 %arrayidx3 = getelementptr inbounds %struct.St4x2, %struct.St4x2* %ret, i64 0, i32 0, i64 0 92 store i32 %1, i32* %arrayidx3, align 4 93 %arrayidx.1 = getelementptr inbounds %struct.St4x2, %struct.St4x2* %in, i64 0, i32 0, i64 1 94 %2 = load i32, i32* %arrayidx.1, align 4 95 %arrayidx3.1 = getelementptr inbounds %struct.St4x2, %struct.St4x2* %ret, i64 0, i32 0, i64 1 96 store i32 %2, i32* %arrayidx3.1, align 4 97 ret void 98} 99 100define dso_local void @foo_St4x3(%struct.St4x3* nocapture noundef readonly byval(%struct.St4x3) align 4 %in, %struct.St4x3* nocapture noundef writeonly %ret) { 101 ; CHECK-LABEL: .visible .func foo_St4x3( 102 ; CHECK: .param .align 4 .b8 foo_St4x3_param_0[12], 103 ; CHECK: .param .b32 foo_St4x3_param_1 104 ; CHECK: ) 105 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x3_param_1]; 106 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x3_param_0]; 107 ; CHECK: st.u32 [[[R1]]], [[R2]]; 108 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x3_param_0+4]; 109 ; CHECK: st.u32 [[[R1]]+4], [[R3]]; 110 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x3_param_0+8]; 111 ; CHECK: st.u32 [[[R1]]+8], [[R4]]; 112 ; CHECK: ret; 113 %arrayidx = getelementptr inbounds %struct.St4x3, %struct.St4x3* %in, i64 0, i32 0, i64 0 114 %1 = load i32, i32* %arrayidx, align 4 115 %arrayidx3 = getelementptr inbounds %struct.St4x3, %struct.St4x3* %ret, i64 0, i32 0, i64 0 116 store i32 %1, i32* %arrayidx3, align 4 117 %arrayidx.1 = getelementptr inbounds %struct.St4x3, %struct.St4x3* %in, i64 0, i32 0, i64 1 118 %2 = load i32, i32* %arrayidx.1, align 4 119 %arrayidx3.1 = getelementptr inbounds %struct.St4x3, %struct.St4x3* %ret, i64 0, i32 0, i64 1 120 store i32 %2, i32* %arrayidx3.1, align 4 121 %arrayidx.2 = getelementptr inbounds %struct.St4x3, %struct.St4x3* %in, i64 0, i32 0, i64 2 122 %3 = load i32, i32* %arrayidx.2, align 4 123 %arrayidx3.2 = getelementptr inbounds %struct.St4x3, %struct.St4x3* %ret, i64 0, i32 0, i64 2 124 store i32 %3, i32* %arrayidx3.2, align 4 125 ret void 126} 127 128define dso_local void @foo_St4x4(%struct.St4x4* nocapture noundef readonly byval(%struct.St4x4) align 4 %in, %struct.St4x4* nocapture noundef writeonly %ret) { 129 ; CHECK-LABEL: .visible .func foo_St4x4( 130 ; CHECK: .param .align 4 .b8 foo_St4x4_param_0[16], 131 ; CHECK: .param .b32 foo_St4x4_param_1 132 ; CHECK: ) 133 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x4_param_1]; 134 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x4_param_0]; 135 ; CHECK: st.u32 [[[R1]]], [[R2]]; 136 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x4_param_0+4]; 137 ; CHECK: st.u32 [[[R1]]+4], [[R3]]; 138 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x4_param_0+8]; 139 ; CHECK: st.u32 [[[R1]]+8], [[R4]]; 140 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x4_param_0+12]; 141 ; CHECK: st.u32 [[[R1]]+12], [[R5]]; 142 ; CHECK: ret; 143 %arrayidx = getelementptr inbounds %struct.St4x4, %struct.St4x4* %in, i64 0, i32 0, i64 0 144 %1 = load i32, i32* %arrayidx, align 4 145 %arrayidx3 = getelementptr inbounds %struct.St4x4, %struct.St4x4* %ret, i64 0, i32 0, i64 0 146 store i32 %1, i32* %arrayidx3, align 4 147 %arrayidx.1 = getelementptr inbounds %struct.St4x4, %struct.St4x4* %in, i64 0, i32 0, i64 1 148 %2 = load i32, i32* %arrayidx.1, align 4 149 %arrayidx3.1 = getelementptr inbounds %struct.St4x4, %struct.St4x4* %ret, i64 0, i32 0, i64 1 150 store i32 %2, i32* %arrayidx3.1, align 4 151 %arrayidx.2 = getelementptr inbounds %struct.St4x4, %struct.St4x4* %in, i64 0, i32 0, i64 2 152 %3 = load i32, i32* %arrayidx.2, align 4 153 %arrayidx3.2 = getelementptr inbounds %struct.St4x4, %struct.St4x4* %ret, i64 0, i32 0, i64 2 154 store i32 %3, i32* %arrayidx3.2, align 4 155 %arrayidx.3 = getelementptr inbounds %struct.St4x4, %struct.St4x4* %in, i64 0, i32 0, i64 3 156 %4 = load i32, i32* %arrayidx.3, align 4 157 %arrayidx3.3 = getelementptr inbounds %struct.St4x4, %struct.St4x4* %ret, i64 0, i32 0, i64 3 158 store i32 %4, i32* %arrayidx3.3, align 4 159 ret void 160} 161 162define dso_local void @foo_St4x5(%struct.St4x5* nocapture noundef readonly byval(%struct.St4x5) align 4 %in, %struct.St4x5* nocapture noundef writeonly %ret) { 163 ; CHECK-LABEL: .visible .func foo_St4x5( 164 ; CHECK: .param .align 4 .b8 foo_St4x5_param_0[20], 165 ; CHECK: .param .b32 foo_St4x5_param_1 166 ; CHECK: ) 167 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x5_param_1]; 168 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x5_param_0]; 169 ; CHECK: st.u32 [[[R1]]], [[R2]]; 170 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x5_param_0+4]; 171 ; CHECK: st.u32 [[[R1]]+4], [[R3]]; 172 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x5_param_0+8]; 173 ; CHECK: st.u32 [[[R1]]+8], [[R4]]; 174 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x5_param_0+12]; 175 ; CHECK: st.u32 [[[R1]]+12], [[R5]]; 176 ; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x5_param_0+16]; 177 ; CHECK: st.u32 [[[R1]]+16], [[R6]]; 178 ; CHECK: ret; 179 %arrayidx = getelementptr inbounds %struct.St4x5, %struct.St4x5* %in, i64 0, i32 0, i64 0 180 %1 = load i32, i32* %arrayidx, align 4 181 %arrayidx3 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %ret, i64 0, i32 0, i64 0 182 store i32 %1, i32* %arrayidx3, align 4 183 %arrayidx.1 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %in, i64 0, i32 0, i64 1 184 %2 = load i32, i32* %arrayidx.1, align 4 185 %arrayidx3.1 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %ret, i64 0, i32 0, i64 1 186 store i32 %2, i32* %arrayidx3.1, align 4 187 %arrayidx.2 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %in, i64 0, i32 0, i64 2 188 %3 = load i32, i32* %arrayidx.2, align 4 189 %arrayidx3.2 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %ret, i64 0, i32 0, i64 2 190 store i32 %3, i32* %arrayidx3.2, align 4 191 %arrayidx.3 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %in, i64 0, i32 0, i64 3 192 %4 = load i32, i32* %arrayidx.3, align 4 193 %arrayidx3.3 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %ret, i64 0, i32 0, i64 3 194 store i32 %4, i32* %arrayidx3.3, align 4 195 %arrayidx.4 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %in, i64 0, i32 0, i64 4 196 %5 = load i32, i32* %arrayidx.4, align 4 197 %arrayidx3.4 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %ret, i64 0, i32 0, i64 4 198 store i32 %5, i32* %arrayidx3.4, align 4 199 ret void 200} 201 202define dso_local void @foo_St4x6(%struct.St4x6* nocapture noundef readonly byval(%struct.St4x6) align 4 %in, %struct.St4x6* nocapture noundef writeonly %ret) { 203 ; CHECK-LABEL: .visible .func foo_St4x6( 204 ; CHECK: .param .align 4 .b8 foo_St4x6_param_0[24], 205 ; CHECK: .param .b32 foo_St4x6_param_1 206 ; CHECK: ) 207 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x6_param_1]; 208 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x6_param_0]; 209 ; CHECK: st.u32 [[[R1]]], [[R2]]; 210 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x6_param_0+4]; 211 ; CHECK: st.u32 [[[R1]]+4], [[R3]]; 212 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x6_param_0+8]; 213 ; CHECK: st.u32 [[[R1]]+8], [[R4]]; 214 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x6_param_0+12]; 215 ; CHECK: st.u32 [[[R1]]+12], [[R5]]; 216 ; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x6_param_0+16]; 217 ; CHECK: st.u32 [[[R1]]+16], [[R6]]; 218 ; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x6_param_0+20]; 219 ; CHECK: st.u32 [[[R1]]+20], [[R7]]; 220 ; CHECK: ret; 221 %arrayidx = getelementptr inbounds %struct.St4x6, %struct.St4x6* %in, i64 0, i32 0, i64 0 222 %1 = load i32, i32* %arrayidx, align 4 223 %arrayidx3 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %ret, i64 0, i32 0, i64 0 224 store i32 %1, i32* %arrayidx3, align 4 225 %arrayidx.1 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %in, i64 0, i32 0, i64 1 226 %2 = load i32, i32* %arrayidx.1, align 4 227 %arrayidx3.1 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %ret, i64 0, i32 0, i64 1 228 store i32 %2, i32* %arrayidx3.1, align 4 229 %arrayidx.2 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %in, i64 0, i32 0, i64 2 230 %3 = load i32, i32* %arrayidx.2, align 4 231 %arrayidx3.2 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %ret, i64 0, i32 0, i64 2 232 store i32 %3, i32* %arrayidx3.2, align 4 233 %arrayidx.3 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %in, i64 0, i32 0, i64 3 234 %4 = load i32, i32* %arrayidx.3, align 4 235 %arrayidx3.3 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %ret, i64 0, i32 0, i64 3 236 store i32 %4, i32* %arrayidx3.3, align 4 237 %arrayidx.4 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %in, i64 0, i32 0, i64 4 238 %5 = load i32, i32* %arrayidx.4, align 4 239 %arrayidx3.4 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %ret, i64 0, i32 0, i64 4 240 store i32 %5, i32* %arrayidx3.4, align 4 241 %arrayidx.5 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %in, i64 0, i32 0, i64 5 242 %6 = load i32, i32* %arrayidx.5, align 4 243 %arrayidx3.5 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %ret, i64 0, i32 0, i64 5 244 store i32 %6, i32* %arrayidx3.5, align 4 245 ret void 246} 247 248define dso_local void @foo_St4x7(%struct.St4x7* nocapture noundef readonly byval(%struct.St4x7) align 4 %in, %struct.St4x7* nocapture noundef writeonly %ret) { 249 ; CHECK-LABEL: .visible .func foo_St4x7( 250 ; CHECK: .param .align 4 .b8 foo_St4x7_param_0[28], 251 ; CHECK: .param .b32 foo_St4x7_param_1 252 ; CHECK: ) 253 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x7_param_1]; 254 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x7_param_0]; 255 ; CHECK: st.u32 [[[R1]]], [[R2]]; 256 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x7_param_0+4]; 257 ; CHECK: st.u32 [[[R1]]+4], [[R3]]; 258 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x7_param_0+8]; 259 ; CHECK: st.u32 [[[R1]]+8], [[R4]]; 260 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x7_param_0+12]; 261 ; CHECK: st.u32 [[[R1]]+12], [[R5]]; 262 ; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x7_param_0+16]; 263 ; CHECK: st.u32 [[[R1]]+16], [[R6]]; 264 ; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x7_param_0+20]; 265 ; CHECK: st.u32 [[[R1]]+20], [[R7]]; 266 ; CHECK: ld.param.u32 [[R8:%r[0-9]+]], [foo_St4x7_param_0+24]; 267 ; CHECK: st.u32 [[[R1]]+24], [[R8]]; 268 ; CHECK: ret; 269 %arrayidx = getelementptr inbounds %struct.St4x7, %struct.St4x7* %in, i64 0, i32 0, i64 0 270 %1 = load i32, i32* %arrayidx, align 4 271 %arrayidx3 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %ret, i64 0, i32 0, i64 0 272 store i32 %1, i32* %arrayidx3, align 4 273 %arrayidx.1 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %in, i64 0, i32 0, i64 1 274 %2 = load i32, i32* %arrayidx.1, align 4 275 %arrayidx3.1 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %ret, i64 0, i32 0, i64 1 276 store i32 %2, i32* %arrayidx3.1, align 4 277 %arrayidx.2 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %in, i64 0, i32 0, i64 2 278 %3 = load i32, i32* %arrayidx.2, align 4 279 %arrayidx3.2 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %ret, i64 0, i32 0, i64 2 280 store i32 %3, i32* %arrayidx3.2, align 4 281 %arrayidx.3 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %in, i64 0, i32 0, i64 3 282 %4 = load i32, i32* %arrayidx.3, align 4 283 %arrayidx3.3 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %ret, i64 0, i32 0, i64 3 284 store i32 %4, i32* %arrayidx3.3, align 4 285 %arrayidx.4 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %in, i64 0, i32 0, i64 4 286 %5 = load i32, i32* %arrayidx.4, align 4 287 %arrayidx3.4 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %ret, i64 0, i32 0, i64 4 288 store i32 %5, i32* %arrayidx3.4, align 4 289 %arrayidx.5 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %in, i64 0, i32 0, i64 5 290 %6 = load i32, i32* %arrayidx.5, align 4 291 %arrayidx3.5 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %ret, i64 0, i32 0, i64 5 292 store i32 %6, i32* %arrayidx3.5, align 4 293 %arrayidx.6 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %in, i64 0, i32 0, i64 6 294 %7 = load i32, i32* %arrayidx.6, align 4 295 %arrayidx3.6 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %ret, i64 0, i32 0, i64 6 296 store i32 %7, i32* %arrayidx3.6, align 4 297 ret void 298} 299 300define dso_local void @foo_St4x8(%struct.St4x8* nocapture noundef readonly byval(%struct.St4x8) align 4 %in, %struct.St4x8* nocapture noundef writeonly %ret) { 301 ; CHECK-LABEL: .visible .func foo_St4x8( 302 ; CHECK: .param .align 4 .b8 foo_St4x8_param_0[32], 303 ; CHECK: .param .b32 foo_St4x8_param_1 304 ; CHECK: ) 305 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x8_param_1]; 306 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x8_param_0]; 307 ; CHECK: st.u32 [[[R1]]], [[R2]]; 308 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x8_param_0+4]; 309 ; CHECK: st.u32 [[[R1]]+4], [[R3]]; 310 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x8_param_0+8]; 311 ; CHECK: st.u32 [[[R1]]+8], [[R4]]; 312 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x8_param_0+12]; 313 ; CHECK: st.u32 [[[R1]]+12], [[R5]]; 314 ; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x8_param_0+16]; 315 ; CHECK: st.u32 [[[R1]]+16], [[R6]]; 316 ; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x8_param_0+20]; 317 ; CHECK: st.u32 [[[R1]]+20], [[R7]]; 318 ; CHECK: ld.param.u32 [[R8:%r[0-9]+]], [foo_St4x8_param_0+24]; 319 ; CHECK: st.u32 [[[R1]]+24], [[R8]]; 320 ; CHECK: ld.param.u32 [[R9:%r[0-9]+]], [foo_St4x8_param_0+28]; 321 ; CHECK: st.u32 [[[R1]]+28], [[R9]]; 322 ; CHECK: ret; 323 %arrayidx = getelementptr inbounds %struct.St4x8, %struct.St4x8* %in, i64 0, i32 0, i64 0 324 %1 = load i32, i32* %arrayidx, align 4 325 %arrayidx3 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %ret, i64 0, i32 0, i64 0 326 store i32 %1, i32* %arrayidx3, align 4 327 %arrayidx.1 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %in, i64 0, i32 0, i64 1 328 %2 = load i32, i32* %arrayidx.1, align 4 329 %arrayidx3.1 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %ret, i64 0, i32 0, i64 1 330 store i32 %2, i32* %arrayidx3.1, align 4 331 %arrayidx.2 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %in, i64 0, i32 0, i64 2 332 %3 = load i32, i32* %arrayidx.2, align 4 333 %arrayidx3.2 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %ret, i64 0, i32 0, i64 2 334 store i32 %3, i32* %arrayidx3.2, align 4 335 %arrayidx.3 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %in, i64 0, i32 0, i64 3 336 %4 = load i32, i32* %arrayidx.3, align 4 337 %arrayidx3.3 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %ret, i64 0, i32 0, i64 3 338 store i32 %4, i32* %arrayidx3.3, align 4 339 %arrayidx.4 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %in, i64 0, i32 0, i64 4 340 %5 = load i32, i32* %arrayidx.4, align 4 341 %arrayidx3.4 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %ret, i64 0, i32 0, i64 4 342 store i32 %5, i32* %arrayidx3.4, align 4 343 %arrayidx.5 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %in, i64 0, i32 0, i64 5 344 %6 = load i32, i32* %arrayidx.5, align 4 345 %arrayidx3.5 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %ret, i64 0, i32 0, i64 5 346 store i32 %6, i32* %arrayidx3.5, align 4 347 %arrayidx.6 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %in, i64 0, i32 0, i64 6 348 %7 = load i32, i32* %arrayidx.6, align 4 349 %arrayidx3.6 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %ret, i64 0, i32 0, i64 6 350 store i32 %7, i32* %arrayidx3.6, align 4 351 %arrayidx.7 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %in, i64 0, i32 0, i64 7 352 %8 = load i32, i32* %arrayidx.7, align 4 353 %arrayidx3.7 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %ret, i64 0, i32 0, i64 7 354 store i32 %8, i32* %arrayidx3.7, align 4 355 ret void 356} 357 358define dso_local void @foo_St8x1(%struct.St8x1* nocapture noundef readonly byval(%struct.St8x1) align 8 %in, %struct.St8x1* nocapture noundef writeonly %ret) { 359 ; CHECK-LABEL: .visible .func foo_St8x1( 360 ; CHECK: .param .align 8 .b8 foo_St8x1_param_0[8], 361 ; CHECK: .param .b32 foo_St8x1_param_1 362 ; CHECK: ) 363 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St8x1_param_1]; 364 ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x1_param_0]; 365 ; CHECK: st.u64 [[[R1]]], [[RD1]]; 366 ; CHECK: ret; 367 %arrayidx = getelementptr inbounds %struct.St8x1, %struct.St8x1* %in, i64 0, i32 0, i64 0 368 %1 = load i64, i64* %arrayidx, align 8 369 %arrayidx3 = getelementptr inbounds %struct.St8x1, %struct.St8x1* %ret, i64 0, i32 0, i64 0 370 store i64 %1, i64* %arrayidx3, align 8 371 ret void 372} 373 374define dso_local void @foo_St8x2(%struct.St8x2* nocapture noundef readonly byval(%struct.St8x2) align 8 %in, %struct.St8x2* nocapture noundef writeonly %ret) { 375 ; CHECK-LABEL: .visible .func foo_St8x2( 376 ; CHECK: .param .align 8 .b8 foo_St8x2_param_0[16], 377 ; CHECK: .param .b32 foo_St8x2_param_1 378 ; CHECK: ) 379 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St8x2_param_1]; 380 ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x2_param_0]; 381 ; CHECK: st.u64 [[[R1]]], [[RD1]]; 382 ; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x2_param_0+8]; 383 ; CHECK: st.u64 [[[R1]]+8], [[RD2]]; 384 ; CHECK: ret; 385 %arrayidx = getelementptr inbounds %struct.St8x2, %struct.St8x2* %in, i64 0, i32 0, i64 0 386 %1 = load i64, i64* %arrayidx, align 8 387 %arrayidx3 = getelementptr inbounds %struct.St8x2, %struct.St8x2* %ret, i64 0, i32 0, i64 0 388 store i64 %1, i64* %arrayidx3, align 8 389 %arrayidx.1 = getelementptr inbounds %struct.St8x2, %struct.St8x2* %in, i64 0, i32 0, i64 1 390 %2 = load i64, i64* %arrayidx.1, align 8 391 %arrayidx3.1 = getelementptr inbounds %struct.St8x2, %struct.St8x2* %ret, i64 0, i32 0, i64 1 392 store i64 %2, i64* %arrayidx3.1, align 8 393 ret void 394} 395 396define dso_local void @foo_St8x3(%struct.St8x3* nocapture noundef readonly byval(%struct.St8x3) align 8 %in, %struct.St8x3* nocapture noundef writeonly %ret) { 397 ; CHECK-LABEL: .visible .func foo_St8x3( 398 ; CHECK: .param .align 8 .b8 foo_St8x3_param_0[24], 399 ; CHECK: .param .b32 foo_St8x3_param_1 400 ; CHECK: ) 401 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St8x3_param_1]; 402 ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x3_param_0]; 403 ; CHECK: st.u64 [[[R1]]], [[RD1]]; 404 ; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x3_param_0+8]; 405 ; CHECK: st.u64 [[[R1]]+8], [[RD2]]; 406 ; CHECK: ld.param.u64 [[RD3:%rd[0-9]+]], [foo_St8x3_param_0+16]; 407 ; CHECK: st.u64 [[[R1]]+16], [[RD3]]; 408 ; CHECK: ret; 409 %arrayidx = getelementptr inbounds %struct.St8x3, %struct.St8x3* %in, i64 0, i32 0, i64 0 410 %1 = load i64, i64* %arrayidx, align 8 411 %arrayidx3 = getelementptr inbounds %struct.St8x3, %struct.St8x3* %ret, i64 0, i32 0, i64 0 412 store i64 %1, i64* %arrayidx3, align 8 413 %arrayidx.1 = getelementptr inbounds %struct.St8x3, %struct.St8x3* %in, i64 0, i32 0, i64 1 414 %2 = load i64, i64* %arrayidx.1, align 8 415 %arrayidx3.1 = getelementptr inbounds %struct.St8x3, %struct.St8x3* %ret, i64 0, i32 0, i64 1 416 store i64 %2, i64* %arrayidx3.1, align 8 417 %arrayidx.2 = getelementptr inbounds %struct.St8x3, %struct.St8x3* %in, i64 0, i32 0, i64 2 418 %3 = load i64, i64* %arrayidx.2, align 8 419 %arrayidx3.2 = getelementptr inbounds %struct.St8x3, %struct.St8x3* %ret, i64 0, i32 0, i64 2 420 store i64 %3, i64* %arrayidx3.2, align 8 421 ret void 422} 423 424define dso_local void @foo_St8x4(%struct.St8x4* nocapture noundef readonly byval(%struct.St8x4) align 8 %in, %struct.St8x4* nocapture noundef writeonly %ret) { 425 ; CHECK-LABEL: .visible .func foo_St8x4( 426 ; CHECK: .param .align 8 .b8 foo_St8x4_param_0[32], 427 ; CHECK: .param .b32 foo_St8x4_param_1 428 ; CHECK: ) 429 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St8x4_param_1]; 430 ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x4_param_0]; 431 ; CHECK: st.u64 [[[R1]]], [[RD1]]; 432 ; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x4_param_0+8]; 433 ; CHECK: st.u64 [[[R1]]+8], [[RD2]]; 434 ; CHECK: ld.param.u64 [[RD3:%rd[0-9]+]], [foo_St8x4_param_0+16]; 435 ; CHECK: st.u64 [[[R1]]+16], [[RD3]]; 436 ; CHECK: ld.param.u64 [[RD4:%rd[0-9]+]], [foo_St8x4_param_0+24]; 437 ; CHECK: st.u64 [[[R1]]+24], [[RD4]]; 438 ; CHECK: ret; 439 %arrayidx = getelementptr inbounds %struct.St8x4, %struct.St8x4* %in, i64 0, i32 0, i64 0 440 %1 = load i64, i64* %arrayidx, align 8 441 %arrayidx3 = getelementptr inbounds %struct.St8x4, %struct.St8x4* %ret, i64 0, i32 0, i64 0 442 store i64 %1, i64* %arrayidx3, align 8 443 %arrayidx.1 = getelementptr inbounds %struct.St8x4, %struct.St8x4* %in, i64 0, i32 0, i64 1 444 %2 = load i64, i64* %arrayidx.1, align 8 445 %arrayidx3.1 = getelementptr inbounds %struct.St8x4, %struct.St8x4* %ret, i64 0, i32 0, i64 1 446 store i64 %2, i64* %arrayidx3.1, align 8 447 %arrayidx.2 = getelementptr inbounds %struct.St8x4, %struct.St8x4* %in, i64 0, i32 0, i64 2 448 %3 = load i64, i64* %arrayidx.2, align 8 449 %arrayidx3.2 = getelementptr inbounds %struct.St8x4, %struct.St8x4* %ret, i64 0, i32 0, i64 2 450 store i64 %3, i64* %arrayidx3.2, align 8 451 %arrayidx.3 = getelementptr inbounds %struct.St8x4, %struct.St8x4* %in, i64 0, i32 0, i64 3 452 %4 = load i64, i64* %arrayidx.3, align 8 453 %arrayidx3.3 = getelementptr inbounds %struct.St8x4, %struct.St8x4* %ret, i64 0, i32 0, i64 3 454 store i64 %4, i64* %arrayidx3.3, align 8 455 ret void 456} 457