1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4; GCN-LABEL: {{^}}atomic_add_i64_offset: 5; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}} 6define amdgpu_kernel void @atomic_add_i64_offset(i64* %out, i64 %in) { 7entry: 8 %gep = getelementptr i64, i64* %out, i64 4 9 %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst 10 ret void 11} 12 13; GCN-LABEL: {{^}}atomic_add_i64_ret_offset: 14; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 15; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 16define amdgpu_kernel void @atomic_add_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 17entry: 18 %gep = getelementptr i64, i64* %out, i64 4 19 %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst 20 store i64 %tmp0, i64* %out2 21 ret void 22} 23 24; GCN-LABEL: {{^}}atomic_add_i64_addr64_offset: 25; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}} 26define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 27entry: 28 %ptr = getelementptr i64, i64* %out, i64 %index 29 %gep = getelementptr i64, i64* %ptr, i64 4 30 %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst 31 ret void 32} 33 34; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64_offset: 35; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 36; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 37define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 38entry: 39 %ptr = getelementptr i64, i64* %out, i64 %index 40 %gep = getelementptr i64, i64* %ptr, i64 4 41 %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst 42 store i64 %tmp0, i64* %out2 43 ret void 44} 45 46; GCN-LABEL: {{^}}atomic_add_i64: 47; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 48define amdgpu_kernel void @atomic_add_i64(i64* %out, i64 %in) { 49entry: 50 %tmp0 = atomicrmw volatile add i64* %out, i64 %in seq_cst 51 ret void 52} 53 54; GCN-LABEL: {{^}}atomic_add_i64_ret: 55; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 56; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 57define amdgpu_kernel void @atomic_add_i64_ret(i64* %out, i64* %out2, i64 %in) { 58entry: 59 %tmp0 = atomicrmw volatile add i64* %out, i64 %in seq_cst 60 store i64 %tmp0, i64* %out2 61 ret void 62} 63 64; GCN-LABEL: {{^}}atomic_add_i64_addr64: 65; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 66define amdgpu_kernel void @atomic_add_i64_addr64(i64* %out, i64 %in, i64 %index) { 67entry: 68 %ptr = getelementptr i64, i64* %out, i64 %index 69 %tmp0 = atomicrmw volatile add i64* %ptr, i64 %in seq_cst 70 ret void 71} 72 73; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64: 74; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 75; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 76define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 77entry: 78 %ptr = getelementptr i64, i64* %out, i64 %index 79 %tmp0 = atomicrmw volatile add i64* %ptr, i64 %in seq_cst 80 store i64 %tmp0, i64* %out2 81 ret void 82} 83 84; GCN-LABEL: {{^}}atomic_and_i64_offset: 85; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 86define amdgpu_kernel void @atomic_and_i64_offset(i64* %out, i64 %in) { 87entry: 88 %gep = getelementptr i64, i64* %out, i64 4 89 %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst 90 ret void 91} 92 93; GCN-LABEL: {{^}}atomic_and_i64_ret_offset: 94; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 95; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 96define amdgpu_kernel void @atomic_and_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 97entry: 98 %gep = getelementptr i64, i64* %out, i64 4 99 %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst 100 store i64 %tmp0, i64* %out2 101 ret void 102} 103 104; GCN-LABEL: {{^}}atomic_and_i64_addr64_offset: 105; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 106define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 107entry: 108 %ptr = getelementptr i64, i64* %out, i64 %index 109 %gep = getelementptr i64, i64* %ptr, i64 4 110 %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst 111 ret void 112} 113 114; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64_offset: 115; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 116; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 117define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 118entry: 119 %ptr = getelementptr i64, i64* %out, i64 %index 120 %gep = getelementptr i64, i64* %ptr, i64 4 121 %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst 122 store i64 %tmp0, i64* %out2 123 ret void 124} 125 126; GCN-LABEL: {{^}}atomic_and_i64: 127; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 128define amdgpu_kernel void @atomic_and_i64(i64* %out, i64 %in) { 129entry: 130 %tmp0 = atomicrmw volatile and i64* %out, i64 %in seq_cst 131 ret void 132} 133 134; GCN-LABEL: {{^}}atomic_and_i64_ret: 135; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 136; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 137define amdgpu_kernel void @atomic_and_i64_ret(i64* %out, i64* %out2, i64 %in) { 138entry: 139 %tmp0 = atomicrmw volatile and i64* %out, i64 %in seq_cst 140 store i64 %tmp0, i64* %out2 141 ret void 142} 143 144; GCN-LABEL: {{^}}atomic_and_i64_addr64: 145; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 146define amdgpu_kernel void @atomic_and_i64_addr64(i64* %out, i64 %in, i64 %index) { 147entry: 148 %ptr = getelementptr i64, i64* %out, i64 %index 149 %tmp0 = atomicrmw volatile and i64* %ptr, i64 %in seq_cst 150 ret void 151} 152 153; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64: 154; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 155; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 156define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 157entry: 158 %ptr = getelementptr i64, i64* %out, i64 %index 159 %tmp0 = atomicrmw volatile and i64* %ptr, i64 %in seq_cst 160 store i64 %tmp0, i64* %out2 161 ret void 162} 163 164; GCN-LABEL: {{^}}atomic_sub_i64_offset: 165; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 166define amdgpu_kernel void @atomic_sub_i64_offset(i64* %out, i64 %in) { 167entry: 168 %gep = getelementptr i64, i64* %out, i64 4 169 %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst 170 ret void 171} 172 173; GCN-LABEL: {{^}}atomic_sub_i64_ret_offset: 174; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 175; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 176define amdgpu_kernel void @atomic_sub_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 177entry: 178 %gep = getelementptr i64, i64* %out, i64 4 179 %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst 180 store i64 %tmp0, i64* %out2 181 ret void 182} 183 184; GCN-LABEL: {{^}}atomic_sub_i64_addr64_offset: 185; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 186define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 187entry: 188 %ptr = getelementptr i64, i64* %out, i64 %index 189 %gep = getelementptr i64, i64* %ptr, i64 4 190 %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst 191 ret void 192} 193 194; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64_offset: 195; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 196; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 197define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 198entry: 199 %ptr = getelementptr i64, i64* %out, i64 %index 200 %gep = getelementptr i64, i64* %ptr, i64 4 201 %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst 202 store i64 %tmp0, i64* %out2 203 ret void 204} 205 206; GCN-LABEL: {{^}}atomic_sub_i64: 207; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 208define amdgpu_kernel void @atomic_sub_i64(i64* %out, i64 %in) { 209entry: 210 %tmp0 = atomicrmw volatile sub i64* %out, i64 %in seq_cst 211 ret void 212} 213 214; GCN-LABEL: {{^}}atomic_sub_i64_ret: 215; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 216; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 217define amdgpu_kernel void @atomic_sub_i64_ret(i64* %out, i64* %out2, i64 %in) { 218entry: 219 %tmp0 = atomicrmw volatile sub i64* %out, i64 %in seq_cst 220 store i64 %tmp0, i64* %out2 221 ret void 222} 223 224; GCN-LABEL: {{^}}atomic_sub_i64_addr64: 225; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 226define amdgpu_kernel void @atomic_sub_i64_addr64(i64* %out, i64 %in, i64 %index) { 227entry: 228 %ptr = getelementptr i64, i64* %out, i64 %index 229 %tmp0 = atomicrmw volatile sub i64* %ptr, i64 %in seq_cst 230 ret void 231} 232 233; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64: 234; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 235; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 236define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 237entry: 238 %ptr = getelementptr i64, i64* %out, i64 %index 239 %tmp0 = atomicrmw volatile sub i64* %ptr, i64 %in seq_cst 240 store i64 %tmp0, i64* %out2 241 ret void 242} 243 244; GCN-LABEL: {{^}}atomic_max_i64_offset: 245; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 246define amdgpu_kernel void @atomic_max_i64_offset(i64* %out, i64 %in) { 247entry: 248 %gep = getelementptr i64, i64* %out, i64 4 249 %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst 250 ret void 251} 252 253; GCN-LABEL: {{^}}atomic_max_i64_ret_offset: 254; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 255; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 256define amdgpu_kernel void @atomic_max_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 257entry: 258 %gep = getelementptr i64, i64* %out, i64 4 259 %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst 260 store i64 %tmp0, i64* %out2 261 ret void 262} 263 264; GCN-LABEL: {{^}}atomic_max_i64_addr64_offset: 265; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 266define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 267entry: 268 %ptr = getelementptr i64, i64* %out, i64 %index 269 %gep = getelementptr i64, i64* %ptr, i64 4 270 %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst 271 ret void 272} 273 274; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64_offset: 275; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 276; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 277define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 278entry: 279 %ptr = getelementptr i64, i64* %out, i64 %index 280 %gep = getelementptr i64, i64* %ptr, i64 4 281 %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst 282 store i64 %tmp0, i64* %out2 283 ret void 284} 285 286; GCN-LABEL: {{^}}atomic_max_i64: 287; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 288define amdgpu_kernel void @atomic_max_i64(i64* %out, i64 %in) { 289entry: 290 %tmp0 = atomicrmw volatile max i64* %out, i64 %in seq_cst 291 ret void 292} 293 294; GCN-LABEL: {{^}}atomic_max_i64_ret: 295; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 296; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 297define amdgpu_kernel void @atomic_max_i64_ret(i64* %out, i64* %out2, i64 %in) { 298entry: 299 %tmp0 = atomicrmw volatile max i64* %out, i64 %in seq_cst 300 store i64 %tmp0, i64* %out2 301 ret void 302} 303 304; GCN-LABEL: {{^}}atomic_max_i64_addr64: 305; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 306define amdgpu_kernel void @atomic_max_i64_addr64(i64* %out, i64 %in, i64 %index) { 307entry: 308 %ptr = getelementptr i64, i64* %out, i64 %index 309 %tmp0 = atomicrmw volatile max i64* %ptr, i64 %in seq_cst 310 ret void 311} 312 313; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64: 314; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 315; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 316define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 317entry: 318 %ptr = getelementptr i64, i64* %out, i64 %index 319 %tmp0 = atomicrmw volatile max i64* %ptr, i64 %in seq_cst 320 store i64 %tmp0, i64* %out2 321 ret void 322} 323 324; GCN-LABEL: {{^}}atomic_umax_i64_offset: 325; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 326define amdgpu_kernel void @atomic_umax_i64_offset(i64* %out, i64 %in) { 327entry: 328 %gep = getelementptr i64, i64* %out, i64 4 329 %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst 330 ret void 331} 332 333; GCN-LABEL: {{^}}atomic_umax_i64_ret_offset: 334; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 335; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 336define amdgpu_kernel void @atomic_umax_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 337entry: 338 %gep = getelementptr i64, i64* %out, i64 4 339 %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst 340 store i64 %tmp0, i64* %out2 341 ret void 342} 343 344; GCN-LABEL: {{^}}atomic_umax_i64_addr64_offset: 345; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 346define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 347entry: 348 %ptr = getelementptr i64, i64* %out, i64 %index 349 %gep = getelementptr i64, i64* %ptr, i64 4 350 %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst 351 ret void 352} 353 354; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64_offset: 355; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 356; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 357define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 358entry: 359 %ptr = getelementptr i64, i64* %out, i64 %index 360 %gep = getelementptr i64, i64* %ptr, i64 4 361 %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst 362 store i64 %tmp0, i64* %out2 363 ret void 364} 365 366; GCN-LABEL: {{^}}atomic_umax_i64: 367; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 368define amdgpu_kernel void @atomic_umax_i64(i64* %out, i64 %in) { 369entry: 370 %tmp0 = atomicrmw volatile umax i64* %out, i64 %in seq_cst 371 ret void 372} 373 374; GCN-LABEL: {{^}}atomic_umax_i64_ret: 375; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 376; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 377define amdgpu_kernel void @atomic_umax_i64_ret(i64* %out, i64* %out2, i64 %in) { 378entry: 379 %tmp0 = atomicrmw volatile umax i64* %out, i64 %in seq_cst 380 store i64 %tmp0, i64* %out2 381 ret void 382} 383 384; GCN-LABEL: {{^}}atomic_umax_i64_addr64: 385; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 386define amdgpu_kernel void @atomic_umax_i64_addr64(i64* %out, i64 %in, i64 %index) { 387entry: 388 %ptr = getelementptr i64, i64* %out, i64 %index 389 %tmp0 = atomicrmw volatile umax i64* %ptr, i64 %in seq_cst 390 ret void 391} 392 393; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64: 394; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 395; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 396define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 397entry: 398 %ptr = getelementptr i64, i64* %out, i64 %index 399 %tmp0 = atomicrmw volatile umax i64* %ptr, i64 %in seq_cst 400 store i64 %tmp0, i64* %out2 401 ret void 402} 403 404; GCN-LABEL: {{^}}atomic_min_i64_offset: 405; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 406define amdgpu_kernel void @atomic_min_i64_offset(i64* %out, i64 %in) { 407entry: 408 %gep = getelementptr i64, i64* %out, i64 4 409 %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst 410 ret void 411} 412 413; GCN-LABEL: {{^}}atomic_min_i64_ret_offset: 414; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 415; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 416define amdgpu_kernel void @atomic_min_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 417entry: 418 %gep = getelementptr i64, i64* %out, i64 4 419 %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst 420 store i64 %tmp0, i64* %out2 421 ret void 422} 423 424; GCN-LABEL: {{^}}atomic_min_i64_addr64_offset: 425; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 426define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 427entry: 428 %ptr = getelementptr i64, i64* %out, i64 %index 429 %gep = getelementptr i64, i64* %ptr, i64 4 430 %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst 431 ret void 432} 433 434; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64_offset: 435; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 436; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 437define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 438entry: 439 %ptr = getelementptr i64, i64* %out, i64 %index 440 %gep = getelementptr i64, i64* %ptr, i64 4 441 %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst 442 store i64 %tmp0, i64* %out2 443 ret void 444} 445 446; GCN-LABEL: {{^}}atomic_min_i64: 447; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 448define amdgpu_kernel void @atomic_min_i64(i64* %out, i64 %in) { 449entry: 450 %tmp0 = atomicrmw volatile min i64* %out, i64 %in seq_cst 451 ret void 452} 453 454; GCN-LABEL: {{^}}atomic_min_i64_ret: 455; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 456; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 457define amdgpu_kernel void @atomic_min_i64_ret(i64* %out, i64* %out2, i64 %in) { 458entry: 459 %tmp0 = atomicrmw volatile min i64* %out, i64 %in seq_cst 460 store i64 %tmp0, i64* %out2 461 ret void 462} 463 464; GCN-LABEL: {{^}}atomic_min_i64_addr64: 465; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 466define amdgpu_kernel void @atomic_min_i64_addr64(i64* %out, i64 %in, i64 %index) { 467entry: 468 %ptr = getelementptr i64, i64* %out, i64 %index 469 %tmp0 = atomicrmw volatile min i64* %ptr, i64 %in seq_cst 470 ret void 471} 472 473; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64: 474; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 475; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 476define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 477entry: 478 %ptr = getelementptr i64, i64* %out, i64 %index 479 %tmp0 = atomicrmw volatile min i64* %ptr, i64 %in seq_cst 480 store i64 %tmp0, i64* %out2 481 ret void 482} 483 484; GCN-LABEL: {{^}}atomic_umin_i64_offset: 485; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 486define amdgpu_kernel void @atomic_umin_i64_offset(i64* %out, i64 %in) { 487entry: 488 %gep = getelementptr i64, i64* %out, i64 4 489 %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst 490 ret void 491} 492 493; GCN-LABEL: {{^}}atomic_umin_i64_ret_offset: 494; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 495; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 496define amdgpu_kernel void @atomic_umin_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 497entry: 498 %gep = getelementptr i64, i64* %out, i64 4 499 %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst 500 store i64 %tmp0, i64* %out2 501 ret void 502} 503 504; GCN-LABEL: {{^}}atomic_umin_i64_addr64_offset: 505; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 506define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 507entry: 508 %ptr = getelementptr i64, i64* %out, i64 %index 509 %gep = getelementptr i64, i64* %ptr, i64 4 510 %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst 511 ret void 512} 513 514; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64_offset: 515; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 516; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 517define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 518entry: 519 %ptr = getelementptr i64, i64* %out, i64 %index 520 %gep = getelementptr i64, i64* %ptr, i64 4 521 %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst 522 store i64 %tmp0, i64* %out2 523 ret void 524} 525 526; GCN-LABEL: {{^}}atomic_umin_i64: 527; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 528define amdgpu_kernel void @atomic_umin_i64(i64* %out, i64 %in) { 529entry: 530 %tmp0 = atomicrmw volatile umin i64* %out, i64 %in seq_cst 531 ret void 532} 533 534; GCN-LABEL: {{^}}atomic_umin_i64_ret: 535; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 536; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 537define amdgpu_kernel void @atomic_umin_i64_ret(i64* %out, i64* %out2, i64 %in) { 538entry: 539 %tmp0 = atomicrmw volatile umin i64* %out, i64 %in seq_cst 540 store i64 %tmp0, i64* %out2 541 ret void 542} 543 544; GCN-LABEL: {{^}}atomic_umin_i64_addr64: 545; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 546define amdgpu_kernel void @atomic_umin_i64_addr64(i64* %out, i64 %in, i64 %index) { 547entry: 548 %ptr = getelementptr i64, i64* %out, i64 %index 549 %tmp0 = atomicrmw volatile umin i64* %ptr, i64 %in seq_cst 550 ret void 551} 552 553; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64: 554; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 555; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 556define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 557entry: 558 %ptr = getelementptr i64, i64* %out, i64 %index 559 %tmp0 = atomicrmw volatile umin i64* %ptr, i64 %in seq_cst 560 store i64 %tmp0, i64* %out2 561 ret void 562} 563 564; GCN-LABEL: {{^}}atomic_or_i64_offset: 565; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 566define amdgpu_kernel void @atomic_or_i64_offset(i64* %out, i64 %in) { 567entry: 568 %gep = getelementptr i64, i64* %out, i64 4 569 %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst 570 ret void 571} 572 573; GCN-LABEL: {{^}}atomic_or_i64_ret_offset: 574; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 575; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 576define amdgpu_kernel void @atomic_or_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 577entry: 578 %gep = getelementptr i64, i64* %out, i64 4 579 %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst 580 store i64 %tmp0, i64* %out2 581 ret void 582} 583 584; GCN-LABEL: {{^}}atomic_or_i64_addr64_offset: 585; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 586define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 587entry: 588 %ptr = getelementptr i64, i64* %out, i64 %index 589 %gep = getelementptr i64, i64* %ptr, i64 4 590 %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst 591 ret void 592} 593 594; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64_offset: 595; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 596; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 597define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 598entry: 599 %ptr = getelementptr i64, i64* %out, i64 %index 600 %gep = getelementptr i64, i64* %ptr, i64 4 601 %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst 602 store i64 %tmp0, i64* %out2 603 ret void 604} 605 606; GCN-LABEL: {{^}}atomic_or_i64: 607; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 608define amdgpu_kernel void @atomic_or_i64(i64* %out, i64 %in) { 609entry: 610 %tmp0 = atomicrmw volatile or i64* %out, i64 %in seq_cst 611 ret void 612} 613 614; GCN-LABEL: {{^}}atomic_or_i64_ret: 615; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 616; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 617define amdgpu_kernel void @atomic_or_i64_ret(i64* %out, i64* %out2, i64 %in) { 618entry: 619 %tmp0 = atomicrmw volatile or i64* %out, i64 %in seq_cst 620 store i64 %tmp0, i64* %out2 621 ret void 622} 623 624; GCN-LABEL: {{^}}atomic_or_i64_addr64: 625; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 626define amdgpu_kernel void @atomic_or_i64_addr64(i64* %out, i64 %in, i64 %index) { 627entry: 628 %ptr = getelementptr i64, i64* %out, i64 %index 629 %tmp0 = atomicrmw volatile or i64* %ptr, i64 %in seq_cst 630 ret void 631} 632 633; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64: 634; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 635; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 636define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 637entry: 638 %ptr = getelementptr i64, i64* %out, i64 %index 639 %tmp0 = atomicrmw volatile or i64* %ptr, i64 %in seq_cst 640 store i64 %tmp0, i64* %out2 641 ret void 642} 643 644; GCN-LABEL: {{^}}atomic_xchg_i64_offset: 645; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 646define amdgpu_kernel void @atomic_xchg_i64_offset(i64* %out, i64 %in) { 647entry: 648 %gep = getelementptr i64, i64* %out, i64 4 649 %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst 650 ret void 651} 652 653; GCN-LABEL: {{^}}atomic_xchg_f64_offset: 654; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 655define amdgpu_kernel void @atomic_xchg_f64_offset(double* %out, double %in) { 656entry: 657 %gep = getelementptr double, double* %out, i64 4 658 %tmp0 = atomicrmw volatile xchg double* %gep, double %in seq_cst 659 ret void 660} 661 662; GCN-LABEL: {{^}}atomic_xchg_pointer_offset: 663; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 664define amdgpu_kernel void @atomic_xchg_pointer_offset(i8** %out, i8* %in) { 665entry: 666 %gep = getelementptr i8*, i8** %out, i32 4 667 %val = atomicrmw volatile xchg i8** %gep, i8* %in seq_cst 668 ret void 669} 670 671; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset: 672; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 673; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 674define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 675entry: 676 %gep = getelementptr i64, i64* %out, i64 4 677 %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst 678 store i64 %tmp0, i64* %out2 679 ret void 680} 681 682; GCN-LABEL: {{^}}atomic_xchg_i64_addr64_offset: 683; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 684define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 685entry: 686 %ptr = getelementptr i64, i64* %out, i64 %index 687 %gep = getelementptr i64, i64* %ptr, i64 4 688 %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst 689 ret void 690} 691 692; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64_offset: 693; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 694; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 695define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 696entry: 697 %ptr = getelementptr i64, i64* %out, i64 %index 698 %gep = getelementptr i64, i64* %ptr, i64 4 699 %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst 700 store i64 %tmp0, i64* %out2 701 ret void 702} 703 704; GCN-LABEL: {{^}}atomic_xchg_i64: 705; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 706define amdgpu_kernel void @atomic_xchg_i64(i64* %out, i64 %in) { 707entry: 708 %tmp0 = atomicrmw volatile xchg i64* %out, i64 %in seq_cst 709 ret void 710} 711 712; GCN-LABEL: {{^}}atomic_xchg_i64_ret: 713; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 714; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 715define amdgpu_kernel void @atomic_xchg_i64_ret(i64* %out, i64* %out2, i64 %in) { 716entry: 717 %tmp0 = atomicrmw volatile xchg i64* %out, i64 %in seq_cst 718 store i64 %tmp0, i64* %out2 719 ret void 720} 721 722; GCN-LABEL: {{^}}atomic_xchg_i64_addr64: 723; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 724define amdgpu_kernel void @atomic_xchg_i64_addr64(i64* %out, i64 %in, i64 %index) { 725entry: 726 %ptr = getelementptr i64, i64* %out, i64 %index 727 %tmp0 = atomicrmw volatile xchg i64* %ptr, i64 %in seq_cst 728 ret void 729} 730 731; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64: 732; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 733; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 734define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 735entry: 736 %ptr = getelementptr i64, i64* %out, i64 %index 737 %tmp0 = atomicrmw volatile xchg i64* %ptr, i64 %in seq_cst 738 store i64 %tmp0, i64* %out2 739 ret void 740} 741 742; GCN-LABEL: {{^}}atomic_xor_i64_offset: 743; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 744define amdgpu_kernel void @atomic_xor_i64_offset(i64* %out, i64 %in) { 745entry: 746 %gep = getelementptr i64, i64* %out, i64 4 747 %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst 748 ret void 749} 750 751; GCN-LABEL: {{^}}atomic_xor_i64_ret_offset: 752; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 753; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 754define amdgpu_kernel void @atomic_xor_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { 755entry: 756 %gep = getelementptr i64, i64* %out, i64 4 757 %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst 758 store i64 %tmp0, i64* %out2 759 ret void 760} 761 762; GCN-LABEL: {{^}}atomic_xor_i64_addr64_offset: 763; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 764define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { 765entry: 766 %ptr = getelementptr i64, i64* %out, i64 %index 767 %gep = getelementptr i64, i64* %ptr, i64 4 768 %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst 769 ret void 770} 771 772; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64_offset: 773; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 774; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 775define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { 776entry: 777 %ptr = getelementptr i64, i64* %out, i64 %index 778 %gep = getelementptr i64, i64* %ptr, i64 4 779 %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst 780 store i64 %tmp0, i64* %out2 781 ret void 782} 783 784; GCN-LABEL: {{^}}atomic_xor_i64: 785; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 786define amdgpu_kernel void @atomic_xor_i64(i64* %out, i64 %in) { 787entry: 788 %tmp0 = atomicrmw volatile xor i64* %out, i64 %in seq_cst 789 ret void 790} 791 792; GCN-LABEL: {{^}}atomic_xor_i64_ret: 793; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 794; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 795define amdgpu_kernel void @atomic_xor_i64_ret(i64* %out, i64* %out2, i64 %in) { 796entry: 797 %tmp0 = atomicrmw volatile xor i64* %out, i64 %in seq_cst 798 store i64 %tmp0, i64* %out2 799 ret void 800} 801 802; GCN-LABEL: {{^}}atomic_xor_i64_addr64: 803; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 804define amdgpu_kernel void @atomic_xor_i64_addr64(i64* %out, i64 %in, i64 %index) { 805entry: 806 %ptr = getelementptr i64, i64* %out, i64 %index 807 %tmp0 = atomicrmw volatile xor i64* %ptr, i64 %in seq_cst 808 ret void 809} 810 811; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64: 812; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 813; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 814define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { 815entry: 816 %ptr = getelementptr i64, i64* %out, i64 %index 817 %tmp0 = atomicrmw volatile xor i64* %ptr, i64 %in seq_cst 818 store i64 %tmp0, i64* %out2 819 ret void 820} 821 822; GCN-LABEL: {{^}}atomic_load_i64_offset: 823; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 824; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 825define amdgpu_kernel void @atomic_load_i64_offset(i64* %in, i64* %out) { 826entry: 827 %gep = getelementptr i64, i64* %in, i64 4 828 %val = load atomic i64, i64* %gep seq_cst, align 8 829 store i64 %val, i64* %out 830 ret void 831} 832 833; GCN-LABEL: {{^}}atomic_load_i64: 834; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc 835; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 836define amdgpu_kernel void @atomic_load_i64(i64* %in, i64* %out) { 837entry: 838 %val = load atomic i64, i64* %in seq_cst, align 8 839 store i64 %val, i64* %out 840 ret void 841} 842 843; GCN-LABEL: {{^}}atomic_load_i64_addr64_offset: 844; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 845; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 846define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64* %in, i64* %out, i64 %index) { 847entry: 848 %ptr = getelementptr i64, i64* %in, i64 %index 849 %gep = getelementptr i64, i64* %ptr, i64 4 850 %val = load atomic i64, i64* %gep seq_cst, align 8 851 store i64 %val, i64* %out 852 ret void 853} 854 855; GCN-LABEL: {{^}}atomic_load_i64_addr64: 856; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 857; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 858define amdgpu_kernel void @atomic_load_i64_addr64(i64* %in, i64* %out, i64 %index) { 859entry: 860 %ptr = getelementptr i64, i64* %in, i64 %index 861 %val = load atomic i64, i64* %ptr seq_cst, align 8 862 store i64 %val, i64* %out 863 ret void 864} 865 866; GCN-LABEL: {{^}}atomic_store_i64_offset: 867; GCN: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 868define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, i64* %out) { 869entry: 870 %gep = getelementptr i64, i64* %out, i64 4 871 store atomic i64 %in, i64* %gep seq_cst, align 8 872 ret void 873} 874 875; GCN-LABEL: {{^}}atomic_store_i64: 876; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}] 877define amdgpu_kernel void @atomic_store_i64(i64 %in, i64* %out) { 878entry: 879 store atomic i64 %in, i64* %out seq_cst, align 8 880 ret void 881} 882 883; GCN-LABEL: {{^}}atomic_store_i64_addr64_offset: 884; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} 885define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64* %out, i64 %index) { 886entry: 887 %ptr = getelementptr i64, i64* %out, i64 %index 888 %gep = getelementptr i64, i64* %ptr, i64 4 889 store atomic i64 %in, i64* %gep seq_cst, align 8 890 ret void 891} 892 893; GCN-LABEL: {{^}}atomic_store_i64_addr64: 894; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} 895define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64* %out, i64 %index) { 896entry: 897 %ptr = getelementptr i64, i64* %out, i64 %index 898 store atomic i64 %in, i64* %ptr seq_cst, align 8 899 ret void 900} 901 902; GCN-LABEL: {{^}}atomic_cmpxchg_i64_offset: 903; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 904define amdgpu_kernel void @atomic_cmpxchg_i64_offset(i64* %out, i64 %in, i64 %old) { 905entry: 906 %gep = getelementptr i64, i64* %out, i64 4 907 %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst 908 ret void 909} 910 911; GCN-LABEL: {{^}}atomic_cmpxchg_i64_soffset: 912; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 913define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64* %out, i64 %in, i64 %old) { 914entry: 915 %gep = getelementptr i64, i64* %out, i64 9000 916 %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst 917 ret void 918} 919 920; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset: 921; GCN: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]{{:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 922; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[RET]]: 923define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in, i64 %old) { 924entry: 925 %gep = getelementptr i64, i64* %out, i64 4 926 %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst 927 %extract0 = extractvalue { i64, i1 } %val, 0 928 store i64 %extract0, i64* %out2 929 ret void 930} 931 932; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset: 933; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 934define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64* %out, i64 %in, i64 %index, i64 %old) { 935entry: 936 %ptr = getelementptr i64, i64* %out, i64 %index 937 %gep = getelementptr i64, i64* %ptr, i64 4 938 %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst 939 ret void 940} 941 942; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset: 943; GCN: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 944; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[RET]]: 945define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) { 946entry: 947 %ptr = getelementptr i64, i64* %out, i64 %index 948 %gep = getelementptr i64, i64* %ptr, i64 4 949 %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst 950 %extract0 = extractvalue { i64, i1 } %val, 0 951 store i64 %extract0, i64* %out2 952 ret void 953} 954 955; GCN-LABEL: {{^}}atomic_cmpxchg_i64: 956; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} 957define amdgpu_kernel void @atomic_cmpxchg_i64(i64* %out, i64 %in, i64 %old) { 958entry: 959 %val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst 960 ret void 961} 962 963; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret: 964; GCN: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 965; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[[[RET]]: 966define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64* %out, i64* %out2, i64 %in, i64 %old) { 967entry: 968 %val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst 969 %extract0 = extractvalue { i64, i1 } %val, 0 970 store i64 %extract0, i64* %out2 971 ret void 972} 973 974; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64: 975; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} 976define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64* %out, i64 %in, i64 %index, i64 %old) { 977entry: 978 %ptr = getelementptr i64, i64* %out, i64 %index 979 %val = cmpxchg volatile i64* %ptr, i64 %old, i64 %in seq_cst seq_cst 980 ret void 981} 982 983; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64: 984; GCN: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 985; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[[[RET]]: 986define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) { 987entry: 988 %ptr = getelementptr i64, i64* %out, i64 %index 989 %val = cmpxchg volatile i64* %ptr, i64 %old, i64 %in seq_cst seq_cst 990 %extract0 = extractvalue { i64, i1 } %val, 0 991 store i64 %extract0, i64* %out2 992 ret void 993} 994 995; GCN-LABEL: {{^}}atomic_load_f64_offset: 996; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 997; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 998define amdgpu_kernel void @atomic_load_f64_offset(double* %in, double* %out) { 999entry: 1000 %gep = getelementptr double, double* %in, i64 4 1001 %val = load atomic double, double* %gep seq_cst, align 8 1002 store double %val, double* %out 1003 ret void 1004} 1005 1006; GCN-LABEL: {{^}}atomic_load_f64: 1007; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc 1008; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 1009define amdgpu_kernel void @atomic_load_f64(double* %in, double* %out) { 1010entry: 1011 %val = load atomic double, double* %in seq_cst, align 8 1012 store double %val, double* %out 1013 ret void 1014} 1015 1016; GCN-LABEL: {{^}}atomic_load_f64_addr64_offset: 1017; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1018; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 1019define amdgpu_kernel void @atomic_load_f64_addr64_offset(double* %in, double* %out, i64 %index) { 1020entry: 1021 %ptr = getelementptr double, double* %in, i64 %index 1022 %gep = getelementptr double, double* %ptr, i64 4 1023 %val = load atomic double, double* %gep seq_cst, align 8 1024 store double %val, double* %out 1025 ret void 1026} 1027 1028; GCN-LABEL: {{^}}atomic_load_f64_addr64: 1029; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1030; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 1031define amdgpu_kernel void @atomic_load_f64_addr64(double* %in, double* %out, i64 %index) { 1032entry: 1033 %ptr = getelementptr double, double* %in, i64 %index 1034 %val = load atomic double, double* %ptr seq_cst, align 8 1035 store double %val, double* %out 1036 ret void 1037} 1038 1039; GCN-LABEL: {{^}}atomic_store_f64_offset: 1040; GCN: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 1041define amdgpu_kernel void @atomic_store_f64_offset(double %in, double* %out) { 1042entry: 1043 %gep = getelementptr double, double* %out, i64 4 1044 store atomic double %in, double* %gep seq_cst, align 8 1045 ret void 1046} 1047 1048; GCN-LABEL: {{^}}atomic_store_f64: 1049; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}] 1050define amdgpu_kernel void @atomic_store_f64(double %in, double* %out) { 1051entry: 1052 store atomic double %in, double* %out seq_cst, align 8 1053 ret void 1054} 1055 1056; GCN-LABEL: {{^}}atomic_store_f64_addr64_offset: 1057; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} 1058define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, double* %out, i64 %index) { 1059entry: 1060 %ptr = getelementptr double, double* %out, i64 %index 1061 %gep = getelementptr double, double* %ptr, i64 4 1062 store atomic double %in, double* %gep seq_cst, align 8 1063 ret void 1064} 1065 1066; GCN-LABEL: {{^}}atomic_store_f64_addr64: 1067; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} 1068define amdgpu_kernel void @atomic_store_f64_addr64(double %in, double* %out, i64 %index) { 1069entry: 1070 %ptr = getelementptr double, double* %out, i64 %index 1071 store atomic double %in, double* %ptr seq_cst, align 8 1072 ret void 1073} 1074