1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4; GCN-LABEL: {{^}}atomic_add_i64_offset: 5; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}} 6define amdgpu_kernel void @atomic_add_i64_offset(i64* %out, i64 %in) { 7entry: 8 %gep = getelementptr i64, i64* %out, i64 4 9 %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst 10 ret void 11} 12 13; GCN-LABEL: {{^}}atomic_add_i64_ret_offset: 14; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 15; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 16define amdgpu_kernel void @atomic_add_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 17entry: 18 %gep = getelementptr i64, i64* %out, i64 4 19 %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst 20 store i64 %tmp0, i64* %out2 21 ret void 22} 23 24; GCN-LABEL: {{^}}atomic_add_i64_addr64_offset: 25; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}} 26define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 27entry: 28 %ptr = getelementptr i64, i64* %out, i64 %index 29 %gep = getelementptr i64, i64* %ptr, i64 4 30 %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst 31 ret void 32} 33 34; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64_offset: 35; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 36; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 37define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 38entry: 39 %ptr = getelementptr i64, i64* %out, i64 %index 40 %gep = getelementptr i64, i64* %ptr, i64 4 41 %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst 42 store i64 %tmp0, i64* %out2 43 ret void 44} 45 46; GCN-LABEL: {{^}}atomic_add_i64: 47; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 48define amdgpu_kernel void @atomic_add_i64(i64* %out, i64 %in) { 49entry: 50 %tmp0 = atomicrmw volatile add i64* %out, i64 %in seq_cst 51 ret void 52} 53 54; GCN-LABEL: {{^}}atomic_add_i64_ret: 55; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 56; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 57define amdgpu_kernel void @atomic_add_i64_ret(i64* %out, i64* %out2, i64 %in) { 58entry: 59 %tmp0 = atomicrmw volatile add i64* %out, i64 %in seq_cst 60 store i64 %tmp0, i64* %out2 61 ret void 62} 63 64; GCN-LABEL: {{^}}atomic_add_i64_addr64: 65; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 66define amdgpu_kernel void @atomic_add_i64_addr64(i64* %out, i64 %in, i64 %index) { 67entry: 68 %ptr = getelementptr i64, i64* %out, i64 %index 69 %tmp0 = atomicrmw volatile add i64* %ptr, i64 %in seq_cst 70 ret void 71} 72 73; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64: 74; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 75; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 76define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 77entry: 78 %ptr = getelementptr i64, i64* %out, i64 %index 79 %tmp0 = atomicrmw volatile add i64* %ptr, i64 %in seq_cst 80 store i64 %tmp0, i64* %out2 81 ret void 82} 83 84; GCN-LABEL: {{^}}atomic_and_i64_offset: 85; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 86define amdgpu_kernel void @atomic_and_i64_offset(i64* %out, i64 %in) { 87entry: 88 %gep = getelementptr i64, i64* %out, i64 4 89 %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst 90 ret void 91} 92 93; GCN-LABEL: {{^}}atomic_and_i64_ret_offset: 94; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 95; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 96define amdgpu_kernel void @atomic_and_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 97entry: 98 %gep = getelementptr i64, i64* %out, i64 4 99 %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst 100 store i64 %tmp0, i64* %out2 101 ret void 102} 103 104; GCN-LABEL: {{^}}atomic_and_i64_addr64_offset: 105; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 106define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 107entry: 108 %ptr = getelementptr i64, i64* %out, i64 %index 109 %gep = getelementptr i64, i64* %ptr, i64 4 110 %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst 111 ret void 112} 113 114; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64_offset: 115; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 116; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 117define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 118entry: 119 %ptr = getelementptr i64, i64* %out, i64 %index 120 %gep = getelementptr i64, i64* %ptr, i64 4 121 %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst 122 store i64 %tmp0, i64* %out2 123 ret void 124} 125 126; GCN-LABEL: {{^}}atomic_and_i64: 127; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 128define amdgpu_kernel void @atomic_and_i64(i64* %out, i64 %in) { 129entry: 130 %tmp0 = atomicrmw volatile and i64* %out, i64 %in seq_cst 131 ret void 132} 133 134; GCN-LABEL: {{^}}atomic_and_i64_ret: 135; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 136; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 137define amdgpu_kernel void @atomic_and_i64_ret(i64* %out, i64* %out2, i64 %in) { 138entry: 139 %tmp0 = atomicrmw volatile and i64* %out, i64 %in seq_cst 140 store i64 %tmp0, i64* %out2 141 ret void 142} 143 144; GCN-LABEL: {{^}}atomic_and_i64_addr64: 145; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 146define amdgpu_kernel void @atomic_and_i64_addr64(i64* %out, i64 %in, i64 %index) { 147entry: 148 %ptr = getelementptr i64, i64* %out, i64 %index 149 %tmp0 = atomicrmw volatile and i64* %ptr, i64 %in seq_cst 150 ret void 151} 152 153; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64: 154; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 155; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 156define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 157entry: 158 %ptr = getelementptr i64, i64* %out, i64 %index 159 %tmp0 = atomicrmw volatile and i64* %ptr, i64 %in seq_cst 160 store i64 %tmp0, i64* %out2 161 ret void 162} 163 164; GCN-LABEL: {{^}}atomic_sub_i64_offset: 165; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 166define amdgpu_kernel void @atomic_sub_i64_offset(i64* %out, i64 %in) { 167entry: 168 %gep = getelementptr i64, i64* %out, i64 4 169 %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst 170 ret void 171} 172 173; GCN-LABEL: {{^}}atomic_sub_i64_ret_offset: 174; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 175; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 176define amdgpu_kernel void @atomic_sub_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 177entry: 178 %gep = getelementptr i64, i64* %out, i64 4 179 %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst 180 store i64 %tmp0, i64* %out2 181 ret void 182} 183 184; GCN-LABEL: {{^}}atomic_sub_i64_addr64_offset: 185; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 186define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 187entry: 188 %ptr = getelementptr i64, i64* %out, i64 %index 189 %gep = getelementptr i64, i64* %ptr, i64 4 190 %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst 191 ret void 192} 193 194; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64_offset: 195; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 196; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 197define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 198entry: 199 %ptr = getelementptr i64, i64* %out, i64 %index 200 %gep = getelementptr i64, i64* %ptr, i64 4 201 %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst 202 store i64 %tmp0, i64* %out2 203 ret void 204} 205 206; GCN-LABEL: {{^}}atomic_sub_i64: 207; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 208define amdgpu_kernel void @atomic_sub_i64(i64* %out, i64 %in) { 209entry: 210 %tmp0 = atomicrmw volatile sub i64* %out, i64 %in seq_cst 211 ret void 212} 213 214; GCN-LABEL: {{^}}atomic_sub_i64_ret: 215; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 216; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 217define amdgpu_kernel void @atomic_sub_i64_ret(i64* %out, i64* %out2, i64 %in) { 218entry: 219 %tmp0 = atomicrmw volatile sub i64* %out, i64 %in seq_cst 220 store i64 %tmp0, i64* %out2 221 ret void 222} 223 224; GCN-LABEL: {{^}}atomic_sub_i64_addr64: 225; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 226define amdgpu_kernel void @atomic_sub_i64_addr64(i64* %out, i64 %in, i64 %index) { 227entry: 228 %ptr = getelementptr i64, i64* %out, i64 %index 229 %tmp0 = atomicrmw volatile sub i64* %ptr, i64 %in seq_cst 230 ret void 231} 232 233; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64: 234; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 235; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 236define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 237entry: 238 %ptr = getelementptr i64, i64* %out, i64 %index 239 %tmp0 = atomicrmw volatile sub i64* %ptr, i64 %in seq_cst 240 store i64 %tmp0, i64* %out2 241 ret void 242} 243 244; GCN-LABEL: {{^}}atomic_max_i64_offset: 245; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 246define amdgpu_kernel void @atomic_max_i64_offset(i64* %out, i64 %in) { 247entry: 248 %gep = getelementptr i64, i64* %out, i64 4 249 %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst 250 ret void 251} 252 253; GCN-LABEL: {{^}}atomic_max_i64_ret_offset: 254; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 255; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 256define amdgpu_kernel void @atomic_max_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 257entry: 258 %gep = getelementptr i64, i64* %out, i64 4 259 %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst 260 store i64 %tmp0, i64* %out2 261 ret void 262} 263 264; GCN-LABEL: {{^}}atomic_max_i64_addr64_offset: 265; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 266define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 267entry: 268 %ptr = getelementptr i64, i64* %out, i64 %index 269 %gep = getelementptr i64, i64* %ptr, i64 4 270 %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst 271 ret void 272} 273 274; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64_offset: 275; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 276; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 277define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 278entry: 279 %ptr = getelementptr i64, i64* %out, i64 %index 280 %gep = getelementptr i64, i64* %ptr, i64 4 281 %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst 282 store i64 %tmp0, i64* %out2 283 ret void 284} 285 286; GCN-LABEL: {{^}}atomic_max_i64: 287; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 288define amdgpu_kernel void @atomic_max_i64(i64* %out, i64 %in) { 289entry: 290 %tmp0 = atomicrmw volatile max i64* %out, i64 %in seq_cst 291 ret void 292} 293 294; GCN-LABEL: {{^}}atomic_max_i64_ret: 295; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 296; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 297define amdgpu_kernel void @atomic_max_i64_ret(i64* %out, i64* %out2, i64 %in) { 298entry: 299 %tmp0 = atomicrmw volatile max i64* %out, i64 %in seq_cst 300 store i64 %tmp0, i64* %out2 301 ret void 302} 303 304; GCN-LABEL: {{^}}atomic_max_i64_addr64: 305; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 306define amdgpu_kernel void @atomic_max_i64_addr64(i64* %out, i64 %in, i64 %index) { 307entry: 308 %ptr = getelementptr i64, i64* %out, i64 %index 309 %tmp0 = atomicrmw volatile max i64* %ptr, i64 %in seq_cst 310 ret void 311} 312 313; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64: 314; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 315; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 316define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 317entry: 318 %ptr = getelementptr i64, i64* %out, i64 %index 319 %tmp0 = atomicrmw volatile max i64* %ptr, i64 %in seq_cst 320 store i64 %tmp0, i64* %out2 321 ret void 322} 323 324; GCN-LABEL: {{^}}atomic_umax_i64_offset: 325; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 326define amdgpu_kernel void @atomic_umax_i64_offset(i64* %out, i64 %in) { 327entry: 328 %gep = getelementptr i64, i64* %out, i64 4 329 %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst 330 ret void 331} 332 333; GCN-LABEL: {{^}}atomic_umax_i64_ret_offset: 334; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 335; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 336define amdgpu_kernel void @atomic_umax_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 337entry: 338 %gep = getelementptr i64, i64* %out, i64 4 339 %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst 340 store i64 %tmp0, i64* %out2 341 ret void 342} 343 344; GCN-LABEL: {{^}}atomic_umax_i64_addr64_offset: 345; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 346define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 347entry: 348 %ptr = getelementptr i64, i64* %out, i64 %index 349 %gep = getelementptr i64, i64* %ptr, i64 4 350 %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst 351 ret void 352} 353 354; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64_offset: 355; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 356; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 357define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 358entry: 359 %ptr = getelementptr i64, i64* %out, i64 %index 360 %gep = getelementptr i64, i64* %ptr, i64 4 361 %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst 362 store i64 %tmp0, i64* %out2 363 ret void 364} 365 366; GCN-LABEL: {{^}}atomic_umax_i64: 367; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 368define amdgpu_kernel void @atomic_umax_i64(i64* %out, i64 %in) { 369entry: 370 %tmp0 = atomicrmw volatile umax i64* %out, i64 %in seq_cst 371 ret void 372} 373 374; GCN-LABEL: {{^}}atomic_umax_i64_ret: 375; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 376; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 377define amdgpu_kernel void @atomic_umax_i64_ret(i64* %out, i64* %out2, i64 %in) { 378entry: 379 %tmp0 = atomicrmw volatile umax i64* %out, i64 %in seq_cst 380 store i64 %tmp0, i64* %out2 381 ret void 382} 383 384; GCN-LABEL: {{^}}atomic_umax_i64_addr64: 385; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 386define amdgpu_kernel void @atomic_umax_i64_addr64(i64* %out, i64 %in, i64 %index) { 387entry: 388 %ptr = getelementptr i64, i64* %out, i64 %index 389 %tmp0 = atomicrmw volatile umax i64* %ptr, i64 %in seq_cst 390 ret void 391} 392 393; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64: 394; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 395; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 396define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 397entry: 398 %ptr = getelementptr i64, i64* %out, i64 %index 399 %tmp0 = atomicrmw volatile umax i64* %ptr, i64 %in seq_cst 400 store i64 %tmp0, i64* %out2 401 ret void 402} 403 404; GCN-LABEL: {{^}}atomic_min_i64_offset: 405; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 406define amdgpu_kernel void @atomic_min_i64_offset(i64* %out, i64 %in) { 407entry: 408 %gep = getelementptr i64, i64* %out, i64 4 409 %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst 410 ret void 411} 412 413; GCN-LABEL: {{^}}atomic_min_i64_ret_offset: 414; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 415; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 416define amdgpu_kernel void @atomic_min_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 417entry: 418 %gep = getelementptr i64, i64* %out, i64 4 419 %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst 420 store i64 %tmp0, i64* %out2 421 ret void 422} 423 424; GCN-LABEL: {{^}}atomic_min_i64_addr64_offset: 425; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 426define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 427entry: 428 %ptr = getelementptr i64, i64* %out, i64 %index 429 %gep = getelementptr i64, i64* %ptr, i64 4 430 %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst 431 ret void 432} 433 434; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64_offset: 435; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 436; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 437define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 438entry: 439 %ptr = getelementptr i64, i64* %out, i64 %index 440 %gep = getelementptr i64, i64* %ptr, i64 4 441 %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst 442 store i64 %tmp0, i64* %out2 443 ret void 444} 445 446; GCN-LABEL: {{^}}atomic_min_i64: 447; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 448define amdgpu_kernel void @atomic_min_i64(i64* %out, i64 %in) { 449entry: 450 %tmp0 = atomicrmw volatile min i64* %out, i64 %in seq_cst 451 ret void 452} 453 454; GCN-LABEL: {{^}}atomic_min_i64_ret: 455; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 456; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 457define amdgpu_kernel void @atomic_min_i64_ret(i64* %out, i64* %out2, i64 %in) { 458entry: 459 %tmp0 = atomicrmw volatile min i64* %out, i64 %in seq_cst 460 store i64 %tmp0, i64* %out2 461 ret void 462} 463 464; GCN-LABEL: {{^}}atomic_min_i64_addr64: 465; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 466define amdgpu_kernel void @atomic_min_i64_addr64(i64* %out, i64 %in, i64 %index) { 467entry: 468 %ptr = getelementptr i64, i64* %out, i64 %index 469 %tmp0 = atomicrmw volatile min i64* %ptr, i64 %in seq_cst 470 ret void 471} 472 473; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64: 474; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 475; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 476define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 477entry: 478 %ptr = getelementptr i64, i64* %out, i64 %index 479 %tmp0 = atomicrmw volatile min i64* %ptr, i64 %in seq_cst 480 store i64 %tmp0, i64* %out2 481 ret void 482} 483 484; GCN-LABEL: {{^}}atomic_umin_i64_offset: 485; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 486define amdgpu_kernel void @atomic_umin_i64_offset(i64* %out, i64 %in) { 487entry: 488 %gep = getelementptr i64, i64* %out, i64 4 489 %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst 490 ret void 491} 492 493; GCN-LABEL: {{^}}atomic_umin_i64_ret_offset: 494; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 495; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 496define amdgpu_kernel void @atomic_umin_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 497entry: 498 %gep = getelementptr i64, i64* %out, i64 4 499 %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst 500 store i64 %tmp0, i64* %out2 501 ret void 502} 503 504; GCN-LABEL: {{^}}atomic_umin_i64_addr64_offset: 505; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 506define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 507entry: 508 %ptr = getelementptr i64, i64* %out, i64 %index 509 %gep = getelementptr i64, i64* %ptr, i64 4 510 %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst 511 ret void 512} 513 514; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64_offset: 515; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 516; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 517define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 518entry: 519 %ptr = getelementptr i64, i64* %out, i64 %index 520 %gep = getelementptr i64, i64* %ptr, i64 4 521 %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst 522 store i64 %tmp0, i64* %out2 523 ret void 524} 525 526; GCN-LABEL: {{^}}atomic_umin_i64: 527; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 528define amdgpu_kernel void @atomic_umin_i64(i64* %out, i64 %in) { 529entry: 530 %tmp0 = atomicrmw volatile umin i64* %out, i64 %in seq_cst 531 ret void 532} 533 534; GCN-LABEL: {{^}}atomic_umin_i64_ret: 535; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 536; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 537define amdgpu_kernel void @atomic_umin_i64_ret(i64* %out, i64* %out2, i64 %in) { 538entry: 539 %tmp0 = atomicrmw volatile umin i64* %out, i64 %in seq_cst 540 store i64 %tmp0, i64* %out2 541 ret void 542} 543 544; GCN-LABEL: {{^}}atomic_umin_i64_addr64: 545; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 546define amdgpu_kernel void @atomic_umin_i64_addr64(i64* %out, i64 %in, i64 %index) { 547entry: 548 %ptr = getelementptr i64, i64* %out, i64 %index 549 %tmp0 = atomicrmw volatile umin i64* %ptr, i64 %in seq_cst 550 ret void 551} 552 553; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64: 554; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 555; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 556define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 557entry: 558 %ptr = getelementptr i64, i64* %out, i64 %index 559 %tmp0 = atomicrmw volatile umin i64* %ptr, i64 %in seq_cst 560 store i64 %tmp0, i64* %out2 561 ret void 562} 563 564; GCN-LABEL: {{^}}atomic_or_i64_offset: 565; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 566define amdgpu_kernel void @atomic_or_i64_offset(i64* %out, i64 %in) { 567entry: 568 %gep = getelementptr i64, i64* %out, i64 4 569 %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst 570 ret void 571} 572 573; GCN-LABEL: {{^}}atomic_or_i64_ret_offset: 574; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 575; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 576define amdgpu_kernel void @atomic_or_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 577entry: 578 %gep = getelementptr i64, i64* %out, i64 4 579 %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst 580 store i64 %tmp0, i64* %out2 581 ret void 582} 583 584; GCN-LABEL: {{^}}atomic_or_i64_addr64_offset: 585; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 586define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 587entry: 588 %ptr = getelementptr i64, i64* %out, i64 %index 589 %gep = getelementptr i64, i64* %ptr, i64 4 590 %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst 591 ret void 592} 593 594; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64_offset: 595; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 596; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 597define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 598entry: 599 %ptr = getelementptr i64, i64* %out, i64 %index 600 %gep = getelementptr i64, i64* %ptr, i64 4 601 %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst 602 store i64 %tmp0, i64* %out2 603 ret void 604} 605 606; GCN-LABEL: {{^}}atomic_or_i64: 607; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 608define amdgpu_kernel void @atomic_or_i64(i64* %out, i64 %in) { 609entry: 610 %tmp0 = atomicrmw volatile or i64* %out, i64 %in seq_cst 611 ret void 612} 613 614; GCN-LABEL: {{^}}atomic_or_i64_ret: 615; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 616; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 617define amdgpu_kernel void @atomic_or_i64_ret(i64* %out, i64* %out2, i64 %in) { 618entry: 619 %tmp0 = atomicrmw volatile or i64* %out, i64 %in seq_cst 620 store i64 %tmp0, i64* %out2 621 ret void 622} 623 624; GCN-LABEL: {{^}}atomic_or_i64_addr64: 625; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 626define amdgpu_kernel void @atomic_or_i64_addr64(i64* %out, i64 %in, i64 %index) { 627entry: 628 %ptr = getelementptr i64, i64* %out, i64 %index 629 %tmp0 = atomicrmw volatile or i64* %ptr, i64 %in seq_cst 630 ret void 631} 632 633; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64: 634; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 635; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 636define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 637entry: 638 %ptr = getelementptr i64, i64* %out, i64 %index 639 %tmp0 = atomicrmw volatile or i64* %ptr, i64 %in seq_cst 640 store i64 %tmp0, i64* %out2 641 ret void 642} 643 644; GCN-LABEL: {{^}}atomic_xchg_i64_offset: 645; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 646define amdgpu_kernel void @atomic_xchg_i64_offset(i64* %out, i64 %in) { 647entry: 648 %gep = getelementptr i64, i64* %out, i64 4 649 %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst 650 ret void 651} 652 653; GCN-LABEL: {{^}}atomic_xchg_f64_offset: 654; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 655define amdgpu_kernel void @atomic_xchg_f64_offset(double* %out, double %in) { 656entry: 657 %gep = getelementptr double, double* %out, i64 4 658 %tmp0 = atomicrmw volatile xchg double* %gep, double %in seq_cst 659 ret void 660} 661 662; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset: 663; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 664; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 665define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 666entry: 667 %gep = getelementptr i64, i64* %out, i64 4 668 %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst 669 store i64 %tmp0, i64* %out2 670 ret void 671} 672 673; GCN-LABEL: {{^}}atomic_xchg_i64_addr64_offset: 674; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 675define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 676entry: 677 %ptr = getelementptr i64, i64* %out, i64 %index 678 %gep = getelementptr i64, i64* %ptr, i64 4 679 %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst 680 ret void 681} 682 683; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64_offset: 684; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 685; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 686define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 687entry: 688 %ptr = getelementptr i64, i64* %out, i64 %index 689 %gep = getelementptr i64, i64* %ptr, i64 4 690 %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst 691 store i64 %tmp0, i64* %out2 692 ret void 693} 694 695; GCN-LABEL: {{^}}atomic_xchg_i64: 696; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 697define amdgpu_kernel void @atomic_xchg_i64(i64* %out, i64 %in) { 698entry: 699 %tmp0 = atomicrmw volatile xchg i64* %out, i64 %in seq_cst 700 ret void 701} 702 703; GCN-LABEL: {{^}}atomic_xchg_i64_ret: 704; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 705; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 706define amdgpu_kernel void @atomic_xchg_i64_ret(i64* %out, i64* %out2, i64 %in) { 707entry: 708 %tmp0 = atomicrmw volatile xchg i64* %out, i64 %in seq_cst 709 store i64 %tmp0, i64* %out2 710 ret void 711} 712 713; GCN-LABEL: {{^}}atomic_xchg_i64_addr64: 714; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 715define amdgpu_kernel void @atomic_xchg_i64_addr64(i64* %out, i64 %in, i64 %index) { 716entry: 717 %ptr = getelementptr i64, i64* %out, i64 %index 718 %tmp0 = atomicrmw volatile xchg i64* %ptr, i64 %in seq_cst 719 ret void 720} 721 722; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64: 723; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 724; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 725define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 726entry: 727 %ptr = getelementptr i64, i64* %out, i64 %index 728 %tmp0 = atomicrmw volatile xchg i64* %ptr, i64 %in seq_cst 729 store i64 %tmp0, i64* %out2 730 ret void 731} 732 733; GCN-LABEL: {{^}}atomic_xor_i64_offset: 734; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 735define amdgpu_kernel void @atomic_xor_i64_offset(i64* %out, i64 %in) { 736entry: 737 %gep = getelementptr i64, i64* %out, i64 4 738 %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst 739 ret void 740} 741 742; GCN-LABEL: {{^}}atomic_xor_i64_ret_offset: 743; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 744; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 745define amdgpu_kernel void @atomic_xor_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 746entry: 747 %gep = getelementptr i64, i64* %out, i64 4 748 %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst 749 store i64 %tmp0, i64* %out2 750 ret void 751} 752 753; GCN-LABEL: {{^}}atomic_xor_i64_addr64_offset: 754; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 755define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 756entry: 757 %ptr = getelementptr i64, i64* %out, i64 %index 758 %gep = getelementptr i64, i64* %ptr, i64 4 759 %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst 760 ret void 761} 762 763; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64_offset: 764; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 765; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 766define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 767entry: 768 %ptr = getelementptr i64, i64* %out, i64 %index 769 %gep = getelementptr i64, i64* %ptr, i64 4 770 %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst 771 store i64 %tmp0, i64* %out2 772 ret void 773} 774 775; GCN-LABEL: {{^}}atomic_xor_i64: 776; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 777define amdgpu_kernel void @atomic_xor_i64(i64* %out, i64 %in) { 778entry: 779 %tmp0 = atomicrmw volatile xor i64* %out, i64 %in seq_cst 780 ret void 781} 782 783; GCN-LABEL: {{^}}atomic_xor_i64_ret: 784; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 785; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 786define amdgpu_kernel void @atomic_xor_i64_ret(i64* %out, i64* %out2, i64 %in) { 787entry: 788 %tmp0 = atomicrmw volatile xor i64* %out, i64 %in seq_cst 789 store i64 %tmp0, i64* %out2 790 ret void 791} 792 793; GCN-LABEL: {{^}}atomic_xor_i64_addr64: 794; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 795define amdgpu_kernel void @atomic_xor_i64_addr64(i64* %out, i64 %in, i64 %index) { 796entry: 797 %ptr = getelementptr i64, i64* %out, i64 %index 798 %tmp0 = atomicrmw volatile xor i64* %ptr, i64 %in seq_cst 799 ret void 800} 801 802; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64: 803; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 804; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 805define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 806entry: 807 %ptr = getelementptr i64, i64* %out, i64 %index 808 %tmp0 = atomicrmw volatile xor i64* %ptr, i64 %in seq_cst 809 store i64 %tmp0, i64* %out2 810 ret void 811} 812 813; GCN-LABEL: {{^}}atomic_load_i64_offset: 814; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 815; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 816define amdgpu_kernel void @atomic_load_i64_offset(i64* %in, i64* %out) { 817entry: 818 %gep = getelementptr i64, i64* %in, i64 4 819 %val = load atomic i64, i64* %gep seq_cst, align 8 820 store i64 %val, i64* %out 821 ret void 822} 823 824; GCN-LABEL: {{^}}atomic_load_i64: 825; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc 826; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 827define amdgpu_kernel void @atomic_load_i64(i64* %in, i64* %out) { 828entry: 829 %val = load atomic i64, i64* %in seq_cst, align 8 830 store i64 %val, i64* %out 831 ret void 832} 833 834; GCN-LABEL: {{^}}atomic_load_i64_addr64_offset: 835; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 836; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 837define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64* %in, i64* %out, i64 %index) { 838entry: 839 %ptr = getelementptr i64, i64* %in, i64 %index 840 %gep = getelementptr i64, i64* %ptr, i64 4 841 %val = load atomic i64, i64* %gep seq_cst, align 8 842 store i64 %val, i64* %out 843 ret void 844} 845 846; GCN-LABEL: {{^}}atomic_load_i64_addr64: 847; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 848; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 849define amdgpu_kernel void @atomic_load_i64_addr64(i64* %in, i64* %out, i64 %index) { 850entry: 851 %ptr = getelementptr i64, i64* %in, i64 %index 852 %val = load atomic i64, i64* %ptr seq_cst, align 8 853 store i64 %val, i64* %out 854 ret void 855} 856 857; GCN-LABEL: {{^}}atomic_store_i64_offset: 858; GCN: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 859define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, i64* %out) { 860entry: 861 %gep = getelementptr i64, i64* %out, i64 4 862 store atomic i64 %in, i64* %gep seq_cst, align 8 863 ret void 864} 865 866; GCN-LABEL: {{^}}atomic_store_i64: 867; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}] 868define amdgpu_kernel void @atomic_store_i64(i64 %in, i64* %out) { 869entry: 870 store atomic i64 %in, i64* %out seq_cst, align 8 871 ret void 872} 873 874; GCN-LABEL: {{^}}atomic_store_i64_addr64_offset: 875; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} 876define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64* %out, i64 %index) { 877entry: 878 %ptr = getelementptr i64, i64* %out, i64 %index 879 %gep = getelementptr i64, i64* %ptr, i64 4 880 store atomic i64 %in, i64* %gep seq_cst, align 8 881 ret void 882} 883 884; GCN-LABEL: {{^}}atomic_store_i64_addr64: 885; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} 886define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64* %out, i64 %index) { 887entry: 888 %ptr = getelementptr i64, i64* %out, i64 %index 889 store atomic i64 %in, i64* %ptr seq_cst, align 8 890 ret void 891} 892 893; GCN-LABEL: {{^}}atomic_cmpxchg_i64_offset: 894; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 895define amdgpu_kernel void @atomic_cmpxchg_i64_offset(i64* %out, i64 %in, i64 %old) { 896entry: 897 %gep = getelementptr i64, i64* %out, i64 4 898 %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst 899 ret void 900} 901 902; GCN-LABEL: {{^}}atomic_cmpxchg_i64_soffset: 903; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 904define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64* %out, i64 %in, i64 %old) { 905entry: 906 %gep = getelementptr i64, i64* %out, i64 9000 907 %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst 908 ret void 909} 910 911; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset: 912; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 913; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]: 914define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in, i64 %old) { 915entry: 916 %gep = getelementptr i64, i64* %out, i64 4 917 %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst 918 %extract0 = extractvalue { i64, i1 } %val, 0 919 store i64 %extract0, i64* %out2 920 ret void 921} 922 923; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset: 924; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 925define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64* %out, i64 %in, i64 %index, i64 %old) { 926entry: 927 %ptr = getelementptr i64, i64* %out, i64 %index 928 %gep = getelementptr i64, i64* %ptr, i64 4 929 %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst 930 ret void 931} 932 933; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset: 934; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 935; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]: 936define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) { 937entry: 938 %ptr = getelementptr i64, i64* %out, i64 %index 939 %gep = getelementptr i64, i64* %ptr, i64 4 940 %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst 941 %extract0 = extractvalue { i64, i1 } %val, 0 942 store i64 %extract0, i64* %out2 943 ret void 944} 945 946; GCN-LABEL: {{^}}atomic_cmpxchg_i64: 947; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} 948define amdgpu_kernel void @atomic_cmpxchg_i64(i64* %out, i64 %in, i64 %old) { 949entry: 950 %val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst 951 ret void 952} 953 954; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret: 955; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 956; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]: 957define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64* %out, i64* %out2, i64 %in, i64 %old) { 958entry: 959 %val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst 960 %extract0 = extractvalue { i64, i1 } %val, 0 961 store i64 %extract0, i64* %out2 962 ret void 963} 964 965; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64: 966; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} 967define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64* %out, i64 %in, i64 %index, i64 %old) { 968entry: 969 %ptr = getelementptr i64, i64* %out, i64 %index 970 %val = cmpxchg volatile i64* %ptr, i64 %old, i64 %in seq_cst seq_cst 971 ret void 972} 973 974; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64: 975; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 976; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]: 977define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) { 978entry: 979 %ptr = getelementptr i64, i64* %out, i64 %index 980 %val = cmpxchg volatile i64* %ptr, i64 %old, i64 %in seq_cst seq_cst 981 %extract0 = extractvalue { i64, i1 } %val, 0 982 store i64 %extract0, i64* %out2 983 ret void 984} 985 986; GCN-LABEL: {{^}}atomic_load_f64_offset: 987; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 988; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 989define amdgpu_kernel void @atomic_load_f64_offset(double* %in, double* %out) { 990entry: 991 %gep = getelementptr double, double* %in, i64 4 992 %val = load atomic double, double* %gep seq_cst, align 8 993 store double %val, double* %out 994 ret void 995} 996 997; GCN-LABEL: {{^}}atomic_load_f64: 998; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc 999; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 1000define amdgpu_kernel void @atomic_load_f64(double* %in, double* %out) { 1001entry: 1002 %val = load atomic double, double* %in seq_cst, align 8 1003 store double %val, double* %out 1004 ret void 1005} 1006 1007; GCN-LABEL: {{^}}atomic_load_f64_addr64_offset: 1008; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1009; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 1010define amdgpu_kernel void @atomic_load_f64_addr64_offset(double* %in, double* %out, i64 %index) { 1011entry: 1012 %ptr = getelementptr double, double* %in, i64 %index 1013 %gep = getelementptr double, double* %ptr, i64 4 1014 %val = load atomic double, double* %gep seq_cst, align 8 1015 store double %val, double* %out 1016 ret void 1017} 1018 1019; GCN-LABEL: {{^}}atomic_load_f64_addr64: 1020; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1021; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 1022define amdgpu_kernel void @atomic_load_f64_addr64(double* %in, double* %out, i64 %index) { 1023entry: 1024 %ptr = getelementptr double, double* %in, i64 %index 1025 %val = load atomic double, double* %ptr seq_cst, align 8 1026 store double %val, double* %out 1027 ret void 1028} 1029 1030; GCN-LABEL: {{^}}atomic_store_f64_offset: 1031; GCN: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 1032define amdgpu_kernel void @atomic_store_f64_offset(double %in, double* %out) { 1033entry: 1034 %gep = getelementptr double, double* %out, i64 4 1035 store atomic double %in, double* %gep seq_cst, align 8 1036 ret void 1037} 1038 1039; GCN-LABEL: {{^}}atomic_store_f64: 1040; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}] 1041define amdgpu_kernel void @atomic_store_f64(double %in, double* %out) { 1042entry: 1043 store atomic double %in, double* %out seq_cst, align 8 1044 ret void 1045} 1046 1047; GCN-LABEL: {{^}}atomic_store_f64_addr64_offset: 1048; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} 1049define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, double* %out, i64 %index) { 1050entry: 1051 %ptr = getelementptr double, double* %out, i64 %index 1052 %gep = getelementptr double, double* %ptr, i64 4 1053 store atomic double %in, double* %gep seq_cst, align 8 1054 ret void 1055} 1056 1057; GCN-LABEL: {{^}}atomic_store_f64_addr64: 1058; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} 1059define amdgpu_kernel void @atomic_store_f64_addr64(double %in, double* %out, i64 %index) { 1060entry: 1061 %ptr = getelementptr double, double* %out, i64 %index 1062 store atomic double %in, double* %ptr seq_cst, align 8 1063 ret void 1064} 1065