1; RUN: llc -march=amdgcn -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-atomic-optimizations=false -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 4 5; GCN-LABEL: {{^}}atomic_add_i32_offset: 6; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 7; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 8define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) { 9entry: 10 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 11 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 12 ret void 13} 14 15; GCN-LABEL: {{^}}atomic_add_i32_max_neg_offset: 16; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-4096{{$}} 17define amdgpu_kernel void @atomic_add_i32_max_neg_offset(i32 addrspace(1)* %out, i32 %in) { 18entry: 19 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 -1024 20 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 21 ret void 22} 23 24; GCN-LABEL: {{^}}atomic_add_i32_soffset: 25; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0 26; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}} 27 28; GFX9: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8000{{$}} 29; GFX9: global_atomic_add [[OFFSET]], v{{[0-9]+}}, s{{\[[0-9]:[0-9]+\]}} offset:3232{{$}} 30define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) { 31entry: 32 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000 33 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 34 ret void 35} 36 37; GCN-LABEL: {{^}}atomic_add_i32_huge_offset: 38; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac 39; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd 40; SI: buffer_atomic_add v{{[0-9]+}}, v[[[PTRLO]]:[[PTRHI]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 41 42; VI: flat_atomic_add 43 44; GFX9: s_add_u32 s[[LOW_K:[0-9]+]], s{{[0-9]+}}, 0xdeac 45; GFX9: s_addc_u32 s[[HIGH_K:[0-9]+]], s{{[0-9]+}}, 0xabcd 46; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[[[LOW_K]]:[[HIGH_K]]]{{$}} 47define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) { 48entry: 49 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595 50 51 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 52 ret void 53} 54 55; GCN-LABEL: {{^}}atomic_add_i32_ret_offset: 56; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 57; SIVI: buffer_store_dword [[RET]] 58 59; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}} 60define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 61entry: 62 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 63 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 64 store i32 %val, i32 addrspace(1)* %out2 65 ret void 66} 67 68; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset: 69; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 70; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 71; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 72define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 73entry: 74 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 75 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 76 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 77 ret void 78} 79 80; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset: 81; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 82; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 83; SIVI: buffer_store_dword [[RET]] 84 85; GFX9: global_atomic_add [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 86; GFX9: global_store_dword v{{[0-9]+}}, [[RET]], s 87define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 88entry: 89 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 90 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 91 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 92 store i32 %val, i32 addrspace(1)* %out2 93 ret void 94} 95 96; GCN-LABEL: {{^}}atomic_add_i32: 97; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 98; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} 99define amdgpu_kernel void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) { 100entry: 101 %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst 102 ret void 103} 104 105; GCN-LABEL: {{^}}atomic_add_i32_ret: 106; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 107; SIVI: buffer_store_dword [[RET]] 108 109; GFX9: global_atomic_add [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 110; GFX9: global_store_dword v{{[0-9]+}}, [[RET]], s 111define amdgpu_kernel void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 112entry: 113 %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst 114 store i32 %val, i32 addrspace(1)* %out2 115 ret void 116} 117 118; GCN-LABEL: {{^}}atomic_add_i32_addr64: 119; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 120; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 121; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 122define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 123entry: 124 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 125 %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst 126 ret void 127} 128 129; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64: 130; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 131; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 132; SIVI: buffer_store_dword [[RET]] 133 134; GFX9: global_atomic_add [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 135define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 136entry: 137 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 138 %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst 139 store i32 %val, i32 addrspace(1)* %out2 140 ret void 141} 142 143; GCN-LABEL: {{^}}atomic_and_i32_offset: 144; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 145 146; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 147define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) { 148entry: 149 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 150 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst 151 ret void 152} 153 154; GCN-LABEL: {{^}}atomic_and_i32_ret_offset: 155; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 156; SIVI: buffer_store_dword [[RET]] 157 158; GFX9: global_atomic_and [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 159define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 160entry: 161 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 162 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst 163 store i32 %val, i32 addrspace(1)* %out2 164 ret void 165} 166 167; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset: 168; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 169; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 170 171; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 172define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 173entry: 174 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 175 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 176 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst 177 ret void 178} 179 180; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset: 181; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 182; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 183; SIVI: buffer_store_dword [[RET]] 184 185; GFX9: global_atomic_and [[RET:v[0-9]]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 186define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 187entry: 188 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 189 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 190 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst 191 store i32 %val, i32 addrspace(1)* %out2 192 ret void 193} 194 195; GCN-LABEL: {{^}}atomic_and_i32: 196; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 197 198; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} 199define amdgpu_kernel void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) { 200entry: 201 %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst 202 ret void 203} 204 205; GCN-LABEL: {{^}}atomic_and_i32_ret: 206; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 207; SIVI: buffer_store_dword [[RET]] 208 209; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 210define amdgpu_kernel void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 211entry: 212 %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst 213 store i32 %val, i32 addrspace(1)* %out2 214 ret void 215} 216 217; GCN-LABEL: {{^}}atomic_and_i32_addr64: 218; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 219; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 220 221; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 222define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 223entry: 224 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 225 %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst 226 ret void 227} 228 229; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64: 230; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 231; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 232; SIVI: buffer_store_dword [[RET]] 233 234; GFX9: global_atomic_and [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 235define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 236entry: 237 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 238 %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst 239 store i32 %val, i32 addrspace(1)* %out2 240 ret void 241} 242 243; GCN-LABEL: {{^}}atomic_sub_i32_offset: 244; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 245 246; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}} 247define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) { 248entry: 249 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 250 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst 251 ret void 252} 253 254; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset: 255; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 256; SIVI: buffer_store_dword [[RET]] 257 258; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}} 259define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 260entry: 261 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 262 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst 263 store i32 %val, i32 addrspace(1)* %out2 264 ret void 265} 266 267; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset: 268; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 269; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 270 271; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 272define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 273entry: 274 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 275 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 276 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst 277 ret void 278} 279 280; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset: 281; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 282; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 283; SIVI: buffer_store_dword [[RET]] 284 285; GFX9: global_atomic_sub [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 286define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 287entry: 288 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 289 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 290 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst 291 store i32 %val, i32 addrspace(1)* %out2 292 ret void 293} 294 295; GCN-LABEL: {{^}}atomic_sub_i32: 296; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 297 298; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}} 299define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) { 300entry: 301 %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst 302 ret void 303} 304 305; GCN-LABEL: {{^}}atomic_sub_i32_ret: 306; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 307; SIVI: buffer_store_dword [[RET]] 308 309; GFX9: global_atomic_sub [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 310define amdgpu_kernel void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 311entry: 312 %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst 313 store i32 %val, i32 addrspace(1)* %out2 314 ret void 315} 316 317; GCN-LABEL: {{^}}atomic_sub_i32_addr64: 318; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 319; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 320 321; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}} 322define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 323entry: 324 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 325 %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst 326 ret void 327} 328 329; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64: 330; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 331; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 332; SIVI: buffer_store_dword [[RET]] 333 334; GFX9: global_atomic_sub [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 335define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 336entry: 337 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 338 %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst 339 store i32 %val, i32 addrspace(1)* %out2 340 ret void 341} 342 343; GCN-LABEL: {{^}}atomic_max_i32_offset: 344; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 345 346; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 347define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) { 348entry: 349 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 350 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst 351 ret void 352} 353 354; GCN-LABEL: {{^}}atomic_max_i32_ret_offset: 355; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 356; SIVI: buffer_store_dword [[RET]] 357 358; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 359define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 360entry: 361 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 362 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst 363 store i32 %val, i32 addrspace(1)* %out2 364 ret void 365} 366 367; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset: 368; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 369; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 370 371; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 372define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 373entry: 374 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 375 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 376 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst 377 ret void 378} 379 380; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset: 381; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 382; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 383; SIVI: buffer_store_dword [[RET]] 384 385; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 386define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 387entry: 388 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 389 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 390 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst 391 store i32 %val, i32 addrspace(1)* %out2 392 ret void 393} 394 395; GCN-LABEL: {{^}}atomic_max_i32: 396; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 397 398; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 399define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) { 400entry: 401 %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst 402 ret void 403} 404 405; GCN-LABEL: {{^}}atomic_max_i32_ret: 406; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 407; SIVI: buffer_store_dword [[RET]] 408 409; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 410define amdgpu_kernel void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 411entry: 412 %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst 413 store i32 %val, i32 addrspace(1)* %out2 414 ret void 415} 416 417; GCN-LABEL: {{^}}atomic_max_i32_addr64: 418; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 419; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 420 421; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 422define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 423entry: 424 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 425 %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst 426 ret void 427} 428 429; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64: 430; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 431; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 432; SIVI: buffer_store_dword [[RET]] 433 434; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 435define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 436entry: 437 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 438 %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst 439 store i32 %val, i32 addrspace(1)* %out2 440 ret void 441} 442 443; GCN-LABEL: {{^}}atomic_umax_i32_offset: 444; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 445 446; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 447define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) { 448entry: 449 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 450 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst 451 ret void 452} 453 454; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset: 455; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 456; SIVI: buffer_store_dword [[RET]] 457 458; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 459define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 460entry: 461 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 462 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst 463 store i32 %val, i32 addrspace(1)* %out2 464 ret void 465} 466 467; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset: 468; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 469; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 470; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 471define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 472entry: 473 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 474 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 475 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst 476 ret void 477} 478 479; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset: 480; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 481; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 482; SIVI: buffer_store_dword [[RET]] 483 484; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 485define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 486entry: 487 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 488 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 489 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst 490 store i32 %val, i32 addrspace(1)* %out2 491 ret void 492} 493 494; GCN-LABEL: {{^}}atomic_umax_i32: 495; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 496 497; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 498define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) { 499entry: 500 %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst 501 ret void 502} 503 504; GCN-LABEL: {{^}}atomic_umax_i32_ret: 505; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 506; SIVI: buffer_store_dword [[RET]] 507 508; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 509define amdgpu_kernel void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 510entry: 511 %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst 512 store i32 %val, i32 addrspace(1)* %out2 513 ret void 514} 515 516; GCN-LABEL: {{^}}atomic_umax_i32_addr64: 517; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 518; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 519; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 520define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 521entry: 522 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 523 %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst 524 ret void 525} 526 527; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64: 528; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 529; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 530; SIVI: buffer_store_dword [[RET]] 531 532; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 533define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 534entry: 535 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 536 %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst 537 store i32 %val, i32 addrspace(1)* %out2 538 ret void 539} 540 541; GCN-LABEL: {{^}}atomic_min_i32_offset: 542; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 543 544; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 545define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) { 546entry: 547 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 548 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst 549 ret void 550} 551 552; GCN-LABEL: {{^}}atomic_min_i32_ret_offset: 553; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 554; SIVI: buffer_store_dword [[RET]] 555 556; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 557define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 558entry: 559 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 560 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst 561 store i32 %val, i32 addrspace(1)* %out2 562 ret void 563} 564 565; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset: 566; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 567; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 568; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 569define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 570entry: 571 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 572 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 573 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst 574 ret void 575} 576 577; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset: 578; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 579; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 580; SIVI: buffer_store_dword [[RET]] 581 582; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 583define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 584entry: 585 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 586 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 587 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst 588 store i32 %val, i32 addrspace(1)* %out2 589 ret void 590} 591 592; GCN-LABEL: {{^}}atomic_min_i32: 593; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 594 595; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}} 596define amdgpu_kernel void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) { 597entry: 598 %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst 599 ret void 600} 601 602; GCN-LABEL: {{^}}atomic_min_i32_ret: 603; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 604; SIVI: buffer_store_dword [[RET]] 605 606; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 607define amdgpu_kernel void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 608entry: 609 %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst 610 store i32 %val, i32 addrspace(1)* %out2 611 ret void 612} 613 614; GCN-LABEL: {{^}}atomic_min_i32_addr64: 615; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 616; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 617; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 618define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 619entry: 620 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 621 %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst 622 ret void 623} 624 625; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64: 626; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 627; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 628; SIVI: buffer_store_dword [[RET]] 629 630; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 631define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 632entry: 633 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 634 %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst 635 store i32 %val, i32 addrspace(1)* %out2 636 ret void 637} 638 639; GCN-LABEL: {{^}}atomic_umin_i32_offset: 640; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 641 642; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 643define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) { 644entry: 645 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 646 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst 647 ret void 648} 649 650; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset: 651; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 652; SIVI: buffer_store_dword [[RET]] 653 654; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 655define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 656entry: 657 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 658 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst 659 store i32 %val, i32 addrspace(1)* %out2 660 ret void 661} 662 663; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset: 664; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 665; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 666; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 667define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 668entry: 669 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 670 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 671 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst 672 ret void 673} 674 675; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset: 676; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 677; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 678; SIVI: buffer_store_dword [[RET]] 679 680; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 681define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 682entry: 683 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 684 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 685 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst 686 store i32 %val, i32 addrspace(1)* %out2 687 ret void 688} 689 690; GCN-LABEL: {{^}}atomic_umin_i32: 691; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 692; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 693define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) { 694entry: 695 %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst 696 ret void 697} 698 699; GCN-LABEL: {{^}}atomic_umin_i32_ret: 700; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 701; SIVI: buffer_store_dword [[RET]] 702 703; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 704define amdgpu_kernel void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 705entry: 706 %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst 707 store i32 %val, i32 addrspace(1)* %out2 708 ret void 709} 710 711; GCN-LABEL: {{^}}atomic_umin_i32_addr64: 712; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 713; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 714; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 715define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 716entry: 717 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 718 %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst 719 ret void 720} 721 722; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64: 723; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 724; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 725; SIVI: buffer_store_dword [[RET]] 726 727; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 728define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 729entry: 730 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 731 %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst 732 store i32 %val, i32 addrspace(1)* %out2 733 ret void 734} 735 736; GCN-LABEL: {{^}}atomic_or_i32_offset: 737; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 738 739; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 740define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) { 741entry: 742 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 743 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst 744 ret void 745} 746 747; GCN-LABEL: {{^}}atomic_or_i32_ret_offset: 748; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 749; SIVI: buffer_store_dword [[RET]] 750 751; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 752define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 753entry: 754 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 755 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst 756 store i32 %val, i32 addrspace(1)* %out2 757 ret void 758} 759 760; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset: 761; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 762; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 763; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 764define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 765entry: 766 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 767 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 768 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst 769 ret void 770} 771 772; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset: 773; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 774; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 775; SIVI: buffer_store_dword [[RET]] 776 777; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 778define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 779entry: 780 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 781 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 782 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst 783 store i32 %val, i32 addrspace(1)* %out2 784 ret void 785} 786 787; GCN-LABEL: {{^}}atomic_or_i32: 788; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 789 790; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 791define amdgpu_kernel void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) { 792entry: 793 %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst 794 ret void 795} 796 797; GCN-LABEL: {{^}}atomic_or_i32_ret: 798; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 799; SIVI: buffer_store_dword [[RET]] 800 801; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 802define amdgpu_kernel void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 803entry: 804 %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst 805 store i32 %val, i32 addrspace(1)* %out2 806 ret void 807} 808 809; GCN-LABEL: {{^}}atomic_or_i32_addr64: 810; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 811; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 812; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 813define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 814entry: 815 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 816 %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst 817 ret void 818} 819 820; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64: 821; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 822; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 823; SIVI: buffer_store_dword [[RET]] 824 825; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 826define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 827entry: 828 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 829 %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst 830 store i32 %val, i32 addrspace(1)* %out2 831 ret void 832} 833 834; GCN-LABEL: {{^}}atomic_xchg_i32_offset: 835; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 836 837; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 838define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) { 839entry: 840 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 841 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst 842 ret void 843} 844 845; GCN-LABEL: {{^}}atomic_xchg_f32_offset: 846; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 847 848; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 849define amdgpu_kernel void @atomic_xchg_f32_offset(float addrspace(1)* %out, float %in) { 850entry: 851 %gep = getelementptr float, float addrspace(1)* %out, i64 4 852 %val = atomicrmw volatile xchg float addrspace(1)* %gep, float %in seq_cst 853 ret void 854} 855 856; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset: 857; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 858; SIVI: buffer_store_dword [[RET]] 859 860; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 861define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 862entry: 863 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 864 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst 865 store i32 %val, i32 addrspace(1)* %out2 866 ret void 867} 868 869; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset: 870; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 871; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 872; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 873define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 874entry: 875 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 876 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 877 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst 878 ret void 879} 880 881; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset: 882; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 883; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 884; SIVI: buffer_store_dword [[RET]] 885 886; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 887define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 888entry: 889 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 890 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 891 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst 892 store i32 %val, i32 addrspace(1)* %out2 893 ret void 894} 895 896; GCN-LABEL: {{^}}atomic_xchg_i32: 897; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 898; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 899define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) { 900entry: 901 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst 902 ret void 903} 904 905; GCN-LABEL: {{^}}atomic_xchg_i32_ret: 906; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 907; SIVI: buffer_store_dword [[RET]] 908 909; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 910define amdgpu_kernel void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 911entry: 912 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst 913 store i32 %val, i32 addrspace(1)* %out2 914 ret void 915} 916 917; GCN-LABEL: {{^}}atomic_xchg_i32_addr64: 918; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 919; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 920; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 921define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 922entry: 923 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 924 %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst 925 ret void 926} 927 928; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64: 929; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 930; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 931; SIVI: buffer_store_dword [[RET]] 932 933; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 934define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 935entry: 936 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 937 %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst 938 store i32 %val, i32 addrspace(1)* %out2 939 ret void 940} 941 942; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset: 943; SIVI: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 944 945; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 946define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) { 947entry: 948 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 949 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 950 ret void 951} 952 953; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset: 954; SIVI: buffer_atomic_cmpswap v[[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 955; SIVI: buffer_store_dword v[[RET]] 956 957; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 958define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) { 959entry: 960 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 961 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 962 %extract0 = extractvalue { i32, i1 } %val, 0 963 store i32 %extract0, i32 addrspace(1)* %out2 964 ret void 965} 966 967; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset: 968; SI: buffer_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 969 970; VI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 971; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 972define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) { 973entry: 974 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 975 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 976 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 977 ret void 978} 979 980; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset: 981; SI: buffer_atomic_cmpswap v[[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 982; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 983; SIVI: buffer_store_dword v[[RET]] 984 985; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 986define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) { 987entry: 988 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 989 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 990 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 991 %extract0 = extractvalue { i32, i1 } %val, 0 992 store i32 %extract0, i32 addrspace(1)* %out2 993 ret void 994} 995 996; GCN-LABEL: {{^}}atomic_cmpxchg_i32: 997; SIVI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 998 999; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]{{$}} 1000define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) { 1001entry: 1002 %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst 1003 ret void 1004} 1005 1006; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret: 1007; SIVI: buffer_atomic_cmpswap v[[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 1008; SIVI: buffer_store_dword v[[RET]] 1009 1010; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 1011define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) { 1012entry: 1013 %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst 1014 %extract0 = extractvalue { i32, i1 } %val, 0 1015 store i32 %extract0, i32 addrspace(1)* %out2 1016 ret void 1017} 1018 1019; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64: 1020; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 1021; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} 1022; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]{{$}} 1023define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) { 1024entry: 1025 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1026 %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst 1027 ret void 1028} 1029 1030; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64: 1031; SI: buffer_atomic_cmpswap v[[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 1032; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1033; SIVI: buffer_store_dword v[[RET]] 1034 1035; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] glc{{$}} 1036define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) { 1037entry: 1038 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1039 %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst 1040 %extract0 = extractvalue { i32, i1 } %val, 0 1041 store i32 %extract0, i32 addrspace(1)* %out2 1042 ret void 1043} 1044 1045; GCN-LABEL: {{^}}atomic_xor_i32_offset: 1046; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 1047 1048; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 1049define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) { 1050entry: 1051 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 1052 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst 1053 ret void 1054} 1055 1056; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset: 1057; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 1058; SIVI: buffer_store_dword [[RET]] 1059 1060; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 1061define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 1062entry: 1063 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 1064 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst 1065 store i32 %val, i32 addrspace(1)* %out2 1066 ret void 1067} 1068 1069; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset: 1070; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 1071; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 1072; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 1073define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 1074entry: 1075 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1076 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 1077 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst 1078 ret void 1079} 1080 1081; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset: 1082; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 1083; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 1084; SIVI: buffer_store_dword [[RET]] 1085 1086; GFX9: global_atomic_xor [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 1087define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 1088entry: 1089 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1090 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 1091 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst 1092 store i32 %val, i32 addrspace(1)* %out2 1093 ret void 1094} 1095 1096; GCN-LABEL: {{^}}atomic_xor_i32: 1097; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 1098; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 1099define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) { 1100entry: 1101 %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst 1102 ret void 1103} 1104 1105; GCN-LABEL: {{^}}atomic_xor_i32_ret: 1106; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 1107; SIVI: buffer_store_dword [[RET]] 1108 1109; GFX9: global_atomic_xor [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} glc{{$}} 1110define amdgpu_kernel void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 1111entry: 1112 %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst 1113 store i32 %val, i32 addrspace(1)* %out2 1114 ret void 1115} 1116 1117; GCN-LABEL: {{^}}atomic_xor_i32_addr64: 1118; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 1119; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 1120; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 1121define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 1122entry: 1123 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1124 %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst 1125 ret void 1126} 1127 1128; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64: 1129; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 1130; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 1131; SIVI: buffer_store_dword [[RET]] 1132 1133; GFX9: global_atomic_xor [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 1134define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 1135entry: 1136 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1137 %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst 1138 store i32 %val, i32 addrspace(1)* %out2 1139 ret void 1140} 1141 1142; GCN-LABEL: {{^}}atomic_load_i32_offset: 1143; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 1144; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 1145; SIVI: buffer_store_dword [[RET]] 1146 1147; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} 1148define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 1149entry: 1150 %gep = getelementptr i32, i32 addrspace(1)* %in, i64 4 1151 %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4 1152 store i32 %val, i32 addrspace(1)* %out 1153 ret void 1154} 1155 1156; GCN-LABEL: {{^}}atomic_load_i32_negoffset: 1157; SI: buffer_load_dword [[RET:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 1158 1159; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xfffffe00 1160; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1 1161; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 1162 1163; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-512 glc{{$}} 1164define amdgpu_kernel void @atomic_load_i32_negoffset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 1165entry: 1166 %gep = getelementptr i32, i32 addrspace(1)* %in, i64 -128 1167 %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4 1168 store i32 %val, i32 addrspace(1)* %out 1169 ret void 1170} 1171 1172; GCN-LABEL: {{^}}atomic_load_f32_offset: 1173; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 1174; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 1175; SIVI: buffer_store_dword [[RET]] 1176 1177; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} 1178define amdgpu_kernel void @atomic_load_f32_offset(float addrspace(1)* %in, float addrspace(1)* %out) { 1179entry: 1180 %gep = getelementptr float, float addrspace(1)* %in, i64 4 1181 %val = load atomic float, float addrspace(1)* %gep seq_cst, align 4 1182 store float %val, float addrspace(1)* %out 1183 ret void 1184} 1185 1186; GCN-LABEL: {{^}}atomic_load_i32: 1187; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 1188; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc 1189; SIVI: buffer_store_dword [[RET]] 1190 1191; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc 1192define amdgpu_kernel void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 1193entry: 1194 %val = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4 1195 store i32 %val, i32 addrspace(1)* %out 1196 ret void 1197} 1198 1199; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset: 1200; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 1201; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1202; SIVI: buffer_store_dword [[RET]] 1203 1204; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 1205define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) { 1206entry: 1207 %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index 1208 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 1209 %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4 1210 store i32 %val, i32 addrspace(1)* %out 1211 ret void 1212} 1213 1214; GCN-LABEL: {{^}}atomic_load_i32_addr64: 1215; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 1216; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1217; SIVI: buffer_store_dword [[RET]] 1218 1219; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 1220define amdgpu_kernel void @atomic_load_i32_addr64(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) { 1221entry: 1222 %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index 1223 %val = load atomic i32, i32 addrspace(1)* %ptr seq_cst, align 4 1224 store i32 %val, i32 addrspace(1)* %out 1225 ret void 1226} 1227 1228; GCN-LABEL: {{^}}atomic_load_f32_addr64_offset: 1229; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 1230; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1231; SIVI: buffer_store_dword [[RET]] 1232 1233; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 1234define amdgpu_kernel void @atomic_load_f32_addr64_offset(float addrspace(1)* %in, float addrspace(1)* %out, i64 %index) { 1235entry: 1236 %ptr = getelementptr float, float addrspace(1)* %in, i64 %index 1237 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4 1238 %val = load atomic float, float addrspace(1)* %gep seq_cst, align 4 1239 store float %val, float addrspace(1)* %out 1240 ret void 1241} 1242 1243; GCN-LABEL: {{^}}atomic_store_i32_offset: 1244; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 1245; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1246; GFX9: global_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}} 1247define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) { 1248entry: 1249 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 1250 store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4 1251 ret void 1252} 1253 1254; GCN-LABEL: {{^}}atomic_store_i32: 1255; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 1256; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1257; GFX9: global_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} 1258define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) { 1259entry: 1260 store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4 1261 ret void 1262} 1263 1264; GCN-LABEL: {{^}}atomic_store_f32: 1265; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 1266; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1267; GFX9: global_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}} 1268define amdgpu_kernel void @atomic_store_f32(float %in, float addrspace(1)* %out) { 1269entry: 1270 store atomic float %in, float addrspace(1)* %out seq_cst, align 4 1271 ret void 1272} 1273 1274; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset: 1275; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 1276; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1277; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 1278define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) { 1279entry: 1280 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1281 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 1282 store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4 1283 ret void 1284} 1285 1286; GCN-LABEL: {{^}}atomic_store_f32_addr64_offset: 1287; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 1288; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1289; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 1290define amdgpu_kernel void @atomic_store_f32_addr64_offset(float %in, float addrspace(1)* %out, i64 %index) { 1291entry: 1292 %ptr = getelementptr float, float addrspace(1)* %out, i64 %index 1293 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4 1294 store atomic float %in, float addrspace(1)* %gep seq_cst, align 4 1295 ret void 1296} 1297 1298; GCN-LABEL: {{^}}atomic_store_i32_addr64: 1299; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 1300; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1301; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}} 1302define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) { 1303entry: 1304 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1305 store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4 1306 ret void 1307} 1308 1309; GCN-LABEL: {{^}}atomic_store_f32_addr64: 1310; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 1311; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1312; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}} 1313define amdgpu_kernel void @atomic_store_f32_addr64(float %in, float addrspace(1)* %out, i64 %index) { 1314entry: 1315 %ptr = getelementptr float, float addrspace(1)* %out, i64 %index 1316 store atomic float %in, float addrspace(1)* %ptr seq_cst, align 4 1317 ret void 1318} 1319 1320; GCN-LABEL: {{^}}atomic_load_i8_offset: 1321; SIVI: buffer_load_ubyte [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 1322; SIVI: buffer_store_byte [[RET]] 1323 1324; GFX9: global_load_ubyte [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} 1325define amdgpu_kernel void @atomic_load_i8_offset(i8 addrspace(1)* %in, i8 addrspace(1)* %out) { 1326entry: 1327 %gep = getelementptr i8, i8 addrspace(1)* %in, i64 16 1328 %val = load atomic i8, i8 addrspace(1)* %gep seq_cst, align 1 1329 store i8 %val, i8 addrspace(1)* %out 1330 ret void 1331} 1332 1333; GCN-LABEL: {{^}}atomic_load_i8_negoffset: 1334; SI: buffer_load_ubyte [[RET:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 1335 1336; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xfffffe00 1337; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1 1338; VI: flat_load_ubyte [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 1339 1340; GFX9: global_load_ubyte [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-512 glc{{$}} 1341define amdgpu_kernel void @atomic_load_i8_negoffset(i8 addrspace(1)* %in, i8 addrspace(1)* %out) { 1342entry: 1343 %gep = getelementptr i8, i8 addrspace(1)* %in, i64 -512 1344 %val = load atomic i8, i8 addrspace(1)* %gep seq_cst, align 1 1345 store i8 %val, i8 addrspace(1)* %out 1346 ret void 1347} 1348 1349; GCN-LABEL: {{^}}atomic_store_i8_offset: 1350; SI: buffer_store_byte {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 1351; VI: flat_store_byte v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1352; GFX9: global_store_byte {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}} 1353define amdgpu_kernel void @atomic_store_i8_offset(i8 %in, i8 addrspace(1)* %out) { 1354entry: 1355 %gep = getelementptr i8, i8 addrspace(1)* %out, i64 16 1356 store atomic i8 %in, i8 addrspace(1)* %gep seq_cst, align 1 1357 ret void 1358} 1359 1360; GCN-LABEL: {{^}}atomic_store_i8: 1361; SI: buffer_store_byte {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 1362; VI: flat_store_byte v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1363; GFX9: global_store_byte {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} 1364define amdgpu_kernel void @atomic_store_i8(i8 %in, i8 addrspace(1)* %out) { 1365entry: 1366 store atomic i8 %in, i8 addrspace(1)* %out seq_cst, align 1 1367 ret void 1368} 1369 1370; GCN-LABEL: {{^}}atomic_load_i16_offset: 1371; SIVI: buffer_load_ushort [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 1372; SIVI: buffer_store_short [[RET]] 1373 1374; GFX9: global_load_ushort [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} 1375define amdgpu_kernel void @atomic_load_i16_offset(i16 addrspace(1)* %in, i16 addrspace(1)* %out) { 1376entry: 1377 %gep = getelementptr i16, i16 addrspace(1)* %in, i64 8 1378 %val = load atomic i16, i16 addrspace(1)* %gep seq_cst, align 2 1379 store i16 %val, i16 addrspace(1)* %out 1380 ret void 1381} 1382 1383; GCN-LABEL: {{^}}atomic_load_i16_negoffset: 1384; SI: buffer_load_ushort [[RET:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 1385 1386; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xfffffe00 1387; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1 1388; VI: flat_load_ushort [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 1389 1390; GFX9: global_load_ushort [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-512 glc{{$}} 1391define amdgpu_kernel void @atomic_load_i16_negoffset(i16 addrspace(1)* %in, i16 addrspace(1)* %out) { 1392entry: 1393 %gep = getelementptr i16, i16 addrspace(1)* %in, i64 -256 1394 %val = load atomic i16, i16 addrspace(1)* %gep seq_cst, align 2 1395 store i16 %val, i16 addrspace(1)* %out 1396 ret void 1397} 1398 1399; GCN-LABEL: {{^}}atomic_store_i16_offset: 1400; SI: buffer_store_short {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 1401; VI: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1402; GFX9: global_store_short {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}} 1403define amdgpu_kernel void @atomic_store_i16_offset(i16 %in, i16 addrspace(1)* %out) { 1404entry: 1405 %gep = getelementptr i16, i16 addrspace(1)* %out, i64 8 1406 store atomic i16 %in, i16 addrspace(1)* %gep seq_cst, align 2 1407 ret void 1408} 1409 1410; GCN-LABEL: {{^}}atomic_store_i16: 1411; SI: buffer_store_short {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 1412; VI: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1413; GFX9: global_store_short {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} 1414define amdgpu_kernel void @atomic_store_i16(i16 %in, i16 addrspace(1)* %out) { 1415entry: 1416 store atomic i16 %in, i16 addrspace(1)* %out seq_cst, align 2 1417 ret void 1418} 1419 1420; GCN-LABEL: {{^}}atomic_store_f16_offset: 1421; SI: buffer_store_short {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 1422; VI: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1423; GFX9: global_store_short {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}} 1424define amdgpu_kernel void @atomic_store_f16_offset(half %in, half addrspace(1)* %out) { 1425entry: 1426 %gep = getelementptr half, half addrspace(1)* %out, i64 8 1427 store atomic half %in, half addrspace(1)* %gep seq_cst, align 2 1428 ret void 1429} 1430 1431; GCN-LABEL: {{^}}atomic_store_f16: 1432; SI: buffer_store_short {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 1433; VI: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1434; GFX9: global_store_short {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} 1435define amdgpu_kernel void @atomic_store_f16(half %in, half addrspace(1)* %out) { 1436entry: 1437 store atomic half %in, half addrspace(1)* %out seq_cst, align 2 1438 ret void 1439} 1440