1; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -amdgpu-atomic-optimizations=true -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX7LESS %s 2; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8MORE %s 3; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8MORE %s 4 5declare i32 @llvm.amdgcn.workitem.id.x() 6 7@local_var32 = addrspace(3) global i32 undef, align 4 8@local_var64 = addrspace(3) global i64 undef, align 8 9 10; Show that what the atomic optimization pass will do for local pointers. 11 12; GCN-LABEL: add_i32_constant: 13; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 14; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 15; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] 16; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] 17; GCN: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}} 18; GCN: v_mul_u32_u24{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[popcount]], 5 19; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] 20define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) { 21entry: 22 %old = atomicrmw add i32 addrspace(3)* @local_var32, i32 5 acq_rel 23 store i32 %old, i32 addrspace(1)* %out 24 ret void 25} 26 27; GCN-LABEL: add_i32_uniform: 28; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 29; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 30; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] 31; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] 32; GCN: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}} 33; GCN: s_mul_i32 s[[scalar_value:[0-9]+]], s{{[0-9]+}}, s[[popcount]] 34; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] 35; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] 36define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive) { 37entry: 38 %old = atomicrmw add i32 addrspace(3)* @local_var32, i32 %additive acq_rel 39 store i32 %old, i32 addrspace(1)* %out 40 ret void 41} 42 43; GCN-LABEL: add_i32_varying: 44; GFX7LESS-NOT: v_mbcnt_lo_u32_b32 45; GFX7LESS-NOT: v_mbcnt_hi_u32_b32 46; GFX7LESS-NOT: s_bcnt1_i32_b64 47; GFX7LESS: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 48; GFX8MORE: v_add_u32_dpp 49; GFX8MORE: v_add_u32_dpp 50; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 51; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] 52; GFX8MORE: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] 53define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) { 54entry: 55 %lane = call i32 @llvm.amdgcn.workitem.id.x() 56 %old = atomicrmw add i32 addrspace(3)* @local_var32, i32 %lane acq_rel 57 store i32 %old, i32 addrspace(1)* %out 58 ret void 59} 60 61; GCN-LABEL: add_i64_constant: 62; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 63; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 64; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] 65; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] 66; GCN: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}} 67; GCN: v_mul_hi_u32_u24{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], s[[popcount]], 5 68; GCN: v_mul_u32_u24{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], s[[popcount]], 5 69; GCN: ds_add_rtn_u64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}} 70define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) { 71entry: 72 %old = atomicrmw add i64 addrspace(3)* @local_var64, i64 5 acq_rel 73 store i64 %old, i64 addrspace(1)* %out 74 ret void 75} 76 77; GCN-LABEL: add_i64_uniform: 78; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 79; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 80; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] 81; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] 82; GCN: s_bcnt1_i32_b64 s{{[0-9]+}}, s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}} 83; GCN: ds_add_rtn_u64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}} 84define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive) { 85entry: 86 %old = atomicrmw add i64 addrspace(3)* @local_var64, i64 %additive acq_rel 87 store i64 %old, i64 addrspace(1)* %out 88 ret void 89} 90 91; GCN-LABEL: add_i64_varying: 92; GCN-NOT: v_mbcnt_lo_u32_b32 93; GCN-NOT: v_mbcnt_hi_u32_b32 94; GCN-NOT: s_bcnt1_i32_b64 95; GCN: ds_add_rtn_u64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}} 96define amdgpu_kernel void @add_i64_varying(i64 addrspace(1)* %out) { 97entry: 98 %lane = call i32 @llvm.amdgcn.workitem.id.x() 99 %zext = zext i32 %lane to i64 100 %old = atomicrmw add i64 addrspace(3)* @local_var64, i64 %zext acq_rel 101 store i64 %old, i64 addrspace(1)* %out 102 ret void 103} 104 105; GCN-LABEL: sub_i32_constant: 106; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 107; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 108; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] 109; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] 110; GCN: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}} 111; GCN: v_mul_u32_u24{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[popcount]], 5 112; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] 113define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) { 114entry: 115 %old = atomicrmw sub i32 addrspace(3)* @local_var32, i32 5 acq_rel 116 store i32 %old, i32 addrspace(1)* %out 117 ret void 118} 119 120; GCN-LABEL: sub_i32_uniform: 121; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 122; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 123; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] 124; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] 125; GCN: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}} 126; GCN: s_mul_i32 s[[scalar_value:[0-9]+]], s{{[0-9]+}}, s[[popcount]] 127; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] 128; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] 129define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 %subitive) { 130entry: 131 %old = atomicrmw sub i32 addrspace(3)* @local_var32, i32 %subitive acq_rel 132 store i32 %old, i32 addrspace(1)* %out 133 ret void 134} 135 136; GCN-LABEL: sub_i32_varying: 137; GFX7LESS-NOT: v_mbcnt_lo_u32_b32 138; GFX7LESS-NOT: v_mbcnt_hi_u32_b32 139; GFX7LESS-NOT: s_bcnt1_i32_b64 140; GFX7LESS: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 141; GFX8MORE: v_sub{{(rev)?}}_u32_dpp 142; GFX8MORE: v_sub{{(rev)?}}_u32_dpp 143; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 144; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] 145; GFX8MORE: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] 146define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) { 147entry: 148 %lane = call i32 @llvm.amdgcn.workitem.id.x() 149 %old = atomicrmw sub i32 addrspace(3)* @local_var32, i32 %lane acq_rel 150 store i32 %old, i32 addrspace(1)* %out 151 ret void 152} 153 154; GCN-LABEL: sub_i64_constant: 155; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 156; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 157; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] 158; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] 159; GCN: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}} 160; GCN: v_mul_hi_u32_u24{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], s[[popcount]], 5 161; GCN: v_mul_u32_u24{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], s[[popcount]], 5 162; GCN: ds_sub_rtn_u64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}} 163define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) { 164entry: 165 %old = atomicrmw sub i64 addrspace(3)* @local_var64, i64 5 acq_rel 166 store i64 %old, i64 addrspace(1)* %out 167 ret void 168} 169 170; GCN-LABEL: sub_i64_uniform: 171; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 172; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 173; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] 174; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] 175; GCN: s_bcnt1_i32_b64 s{{[0-9]+}}, s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}} 176; GCN: ds_sub_rtn_u64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}} 177define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive) { 178entry: 179 %old = atomicrmw sub i64 addrspace(3)* @local_var64, i64 %subitive acq_rel 180 store i64 %old, i64 addrspace(1)* %out 181 ret void 182} 183 184; GCN-LABEL: sub_i64_varying: 185; GCN-NOT: v_mbcnt_lo_u32_b32 186; GCN-NOT: v_mbcnt_hi_u32_b32 187; GCN-NOT: s_bcnt1_i32_b64 188; GCN: ds_sub_rtn_u64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}} 189define amdgpu_kernel void @sub_i64_varying(i64 addrspace(1)* %out) { 190entry: 191 %lane = call i32 @llvm.amdgcn.workitem.id.x() 192 %zext = zext i32 %lane to i64 193 %old = atomicrmw sub i64 addrspace(3)* @local_var64, i64 %zext acq_rel 194 store i64 %old, i64 addrspace(1)* %out 195 ret void 196} 197 198; GCN-LABEL: and_i32_varying: 199; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 200; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] 201; GFX8MORE: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] 202define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) { 203entry: 204 %lane = call i32 @llvm.amdgcn.workitem.id.x() 205 %old = atomicrmw and i32 addrspace(3)* @local_var32, i32 %lane acq_rel 206 store i32 %old, i32 addrspace(1)* %out 207 ret void 208} 209 210; GCN-LABEL: or_i32_varying: 211; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 212; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] 213; GFX8MORE: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] 214define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) { 215entry: 216 %lane = call i32 @llvm.amdgcn.workitem.id.x() 217 %old = atomicrmw or i32 addrspace(3)* @local_var32, i32 %lane acq_rel 218 store i32 %old, i32 addrspace(1)* %out 219 ret void 220} 221 222; GCN-LABEL: xor_i32_varying: 223; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 224; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] 225; GFX8MORE: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] 226define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) { 227entry: 228 %lane = call i32 @llvm.amdgcn.workitem.id.x() 229 %old = atomicrmw xor i32 addrspace(3)* @local_var32, i32 %lane acq_rel 230 store i32 %old, i32 addrspace(1)* %out 231 ret void 232} 233 234; GCN-LABEL: max_i32_varying: 235; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 236; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] 237; GFX8MORE: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] 238define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) { 239entry: 240 %lane = call i32 @llvm.amdgcn.workitem.id.x() 241 %old = atomicrmw max i32 addrspace(3)* @local_var32, i32 %lane acq_rel 242 store i32 %old, i32 addrspace(1)* %out 243 ret void 244} 245 246; GCN-LABEL: max_i64_constant: 247; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 248; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 249; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] 250; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] 251; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5 252; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0 253; GCN: ds_max_rtn_i64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}} 254define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) { 255entry: 256 %old = atomicrmw max i64 addrspace(3)* @local_var64, i64 5 acq_rel 257 store i64 %old, i64 addrspace(1)* %out 258 ret void 259} 260 261; GCN-LABEL: min_i32_varying: 262; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 263; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] 264; GFX8MORE: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] 265define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) { 266entry: 267 %lane = call i32 @llvm.amdgcn.workitem.id.x() 268 %old = atomicrmw min i32 addrspace(3)* @local_var32, i32 %lane acq_rel 269 store i32 %old, i32 addrspace(1)* %out 270 ret void 271} 272 273; GCN-LABEL: min_i64_constant: 274; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 275; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 276; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] 277; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] 278; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5 279; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0 280; GCN: ds_min_rtn_i64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}} 281define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) { 282entry: 283 %old = atomicrmw min i64 addrspace(3)* @local_var64, i64 5 acq_rel 284 store i64 %old, i64 addrspace(1)* %out 285 ret void 286} 287 288; GCN-LABEL: umax_i32_varying: 289; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 290; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] 291; GFX8MORE: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] 292define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) { 293entry: 294 %lane = call i32 @llvm.amdgcn.workitem.id.x() 295 %old = atomicrmw umax i32 addrspace(3)* @local_var32, i32 %lane acq_rel 296 store i32 %old, i32 addrspace(1)* %out 297 ret void 298} 299 300; GCN-LABEL: umax_i64_constant: 301; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 302; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 303; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] 304; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] 305; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5 306; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0 307; GCN: ds_max_rtn_u64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}} 308define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) { 309entry: 310 %old = atomicrmw umax i64 addrspace(3)* @local_var64, i64 5 acq_rel 311 store i64 %old, i64 addrspace(1)* %out 312 ret void 313} 314 315; GCN-LABEL: umin_i32_varying: 316; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 317; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] 318; GFX8MORE: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] 319define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) { 320entry: 321 %lane = call i32 @llvm.amdgcn.workitem.id.x() 322 %old = atomicrmw umin i32 addrspace(3)* @local_var32, i32 %lane acq_rel 323 store i32 %old, i32 addrspace(1)* %out 324 ret void 325} 326 327; GCN-LABEL: umin_i64_constant: 328; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 329; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 330; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] 331; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] 332; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5 333; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0 334; GCN: ds_min_rtn_u64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}} 335define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) { 336entry: 337 %old = atomicrmw umin i64 addrspace(3)* @local_var64, i64 5 acq_rel 338 store i64 %old, i64 addrspace(1)* %out 339 ret void 340} 341