1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s 3; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-vgpr-index-mode -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s 4 5; Tests for indirect addressing on SI, which is implemented using dynamic 6; indexing of vectors. 7 8; GCN-LABEL: {{^}}extract_w_offset: 9; GCN-DAG: s_load_dword [[IN:s[0-9]+]] 10; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 11; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 12; GCN-DAG: v_mov_b32_e32 [[BASEREG:v[0-9]+]], 2.0 13; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0 14 15; MOVREL-DAG: s_mov_b32 m0, [[IN]] 16; MOVREL: v_movrels_b32_e32 v{{[0-9]+}}, [[BASEREG]] 17 18; IDXMODE: s_set_gpr_idx_on [[IN]], src0{{$}} 19; IDXMODE-NEXT: v_mov_b32_e32 v{{[0-9]+}}, [[BASEREG]] 20; IDXMODE-NEXT: s_set_gpr_idx_off 21define void @extract_w_offset(float addrspace(1)* %out, i32 %in) { 22entry: 23 %idx = add i32 %in, 1 24 %elt = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %idx 25 store float %elt, float addrspace(1)* %out 26 ret void 27} 28 29; XXX: Could do v_or_b32 directly 30; GCN-LABEL: {{^}}extract_w_offset_salu_use_vector: 31; MOVREL: s_mov_b32 m0 32; GCN-DAG: s_or_b32 33; GCN-DAG: s_or_b32 34; GCN-DAG: s_or_b32 35; GCN-DAG: s_or_b32 36; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} 37; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} 38; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} 39; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} 40 41; MOVREL: v_movrels_b32_e32 42 43; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, src0{{$}} 44; IDXMODE-NEXT: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} 45; IDXMODE-NEXT: s_set_gpr_idx_off 46define void @extract_w_offset_salu_use_vector(i32 addrspace(1)* %out, i32 %in, <4 x i32> %or.val) { 47entry: 48 %idx = add i32 %in, 1 49 %vec = or <4 x i32> %or.val, <i32 1, i32 2, i32 3, i32 4> 50 %elt = extractelement <4 x i32> %vec, i32 %idx 51 store i32 %elt, i32 addrspace(1)* %out 52 ret void 53} 54 55; GCN-LABEL: {{^}}extract_wo_offset: 56; GCN-DAG: s_load_dword [[IN:s[0-9]+]] 57; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 58; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 59; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 60; GCN-DAG: v_mov_b32_e32 [[BASEREG:v[0-9]+]], 1.0 61 62; MOVREL-DAG: s_mov_b32 m0, [[IN]] 63; MOVREL: v_movrels_b32_e32 v{{[0-9]+}}, [[BASEREG]] 64 65; IDXMODE: s_set_gpr_idx_on [[IN]], src0{{$}} 66; IDXMODE-NEXT: v_mov_b32_e32 v{{[0-9]+}}, [[BASEREG]] 67; IDXMODE-NEXT: s_set_gpr_idx_off 68define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) { 69entry: 70 %elt = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in 71 store float %elt, float addrspace(1)* %out 72 ret void 73} 74 75; GCN-LABEL: {{^}}extract_neg_offset_sgpr: 76; The offset depends on the register that holds the first element of the vector. 77; MOVREL: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} 78; MOVREL: v_movrels_b32_e32 v{{[0-9]}}, v0 79 80; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}} 81; IDXMODE: v_mov_b32_e32 v2, 2 82; IDXMODE: v_mov_b32_e32 v3, 3 83; IDXMODE-NEXT: s_set_gpr_idx_on [[ADD_IDX]], src0{{$}} 84; IDXMODE-NEXT: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} 85; IDXMODE-NEXT: s_set_gpr_idx_off 86define void @extract_neg_offset_sgpr(i32 addrspace(1)* %out, i32 %offset) { 87entry: 88 %index = add i32 %offset, -512 89 %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index 90 store i32 %value, i32 addrspace(1)* %out 91 ret void 92} 93 94; GCN-LABEL: {{^}}extract_neg_offset_sgpr_loaded: 95; The offset depends on the register that holds the first element of the vector. 96; MOVREL: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} 97; MOVREL: v_movrels_b32_e32 v{{[0-9]}}, v0 98 99; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}} 100; IDXMODE: v_mov_b32_e32 v0, 101; IDXMODE: v_mov_b32_e32 v1, 102; IDXMODE: v_mov_b32_e32 v2, 103; IDXMODE: v_mov_b32_e32 v3, 104; IDXMODE-NEXT: s_set_gpr_idx_on [[ADD_IDX]], src0{{$}} 105; IDXMODE-NEXT: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} 106; IDXMODE-NEXT: s_set_gpr_idx_off 107define void @extract_neg_offset_sgpr_loaded(i32 addrspace(1)* %out, <4 x i32> %vec0, <4 x i32> %vec1, i32 %offset) { 108entry: 109 %index = add i32 %offset, -512 110 %or = or <4 x i32> %vec0, %vec1 111 %value = extractelement <4 x i32> %or, i32 %index 112 store i32 %value, i32 addrspace(1)* %out 113 ret void 114} 115 116; GCN-LABEL: {{^}}extract_neg_offset_vgpr: 117; The offset depends on the register that holds the first element of the vector. 118 119; FIXME: The waitcnt for the argument load can go after the loop 120; IDXMODE: s_set_gpr_idx_on 0, src0 121; GCN: s_mov_b64 s{{\[[0-9]+:[0-9]+\]}}, exec 122; GCN: s_waitcnt lgkmcnt(0) 123 124; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v{{[0-9]+}} 125 126; MOVREL: s_add_i32 m0, [[READLANE]], 0xfffffe0 127; MOVREL: s_and_saveexec_b64 vcc, vcc 128; MOVREL: v_movrels_b32_e32 [[RESULT:v[0-9]+]], v1 129 130; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00 131; IDXMODE: s_set_gpr_idx_idx [[ADD_IDX]] 132; IDXMODE: s_and_saveexec_b64 vcc, vcc 133; IDXMODE: v_mov_b32_e32 [[RESULT:v[0-9]+]], v1 134 135; GCN: s_cbranch_execnz 136 137; IDXMODE: s_set_gpr_idx_off 138; GCN: buffer_store_dword [[RESULT]] 139define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) { 140entry: 141 %id = call i32 @llvm.amdgcn.workitem.id.x() #1 142 %index = add i32 %id, -512 143 %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index 144 store i32 %value, i32 addrspace(1)* %out 145 ret void 146} 147 148; GCN-LABEL: {{^}}extract_undef_offset_sgpr: 149define void @extract_undef_offset_sgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 150entry: 151 %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in 152 %value = extractelement <4 x i32> %ld, i32 undef 153 store i32 %value, i32 addrspace(1)* %out 154 ret void 155} 156 157; GCN-LABEL: {{^}}insert_undef_offset_sgpr_vector_src: 158; GCN-DAG: buffer_load_dwordx4 159; MOVREL-DAG: s_mov_b32 m0, 160; MOVREL: v_movreld_b32 161define void @insert_undef_offset_sgpr_vector_src(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 162entry: 163 %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in 164 %value = insertelement <4 x i32> %ld, i32 5, i32 undef 165 store <4 x i32> %value, <4 x i32> addrspace(1)* %out 166 ret void 167} 168 169; GCN-LABEL: {{^}}insert_w_offset: 170; GCN-DAG: s_load_dword [[IN:s[0-9]+]] 171; MOVREL-DAG: s_mov_b32 m0, [[IN]] 172; GCN-DAG: v_mov_b32_e32 v[[ELT0:[0-9]+]], 1.0 173; GCN-DAG: v_mov_b32_e32 v[[ELT1:[0-9]+]], 2.0 174; GCN-DAG: v_mov_b32_e32 v[[ELT2:[0-9]+]], 0x40400000 175; GCN-DAG: v_mov_b32_e32 v[[ELT3:[0-9]+]], 4.0 176; GCN-DAG: v_mov_b32_e32 v[[INS:[0-9]+]], 0x40a00000 177 178; MOVREL: v_movreld_b32_e32 v[[ELT1]], v[[INS]] 179; MOVREL: buffer_store_dwordx4 v{{\[}}[[ELT0]]:[[ELT3]]{{\]}} 180define void @insert_w_offset(<4 x float> addrspace(1)* %out, i32 %in) { 181entry: 182 %0 = add i32 %in, 1 183 %1 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %0 184 store <4 x float> %1, <4 x float> addrspace(1)* %out 185 ret void 186} 187 188; GCN-LABEL: {{^}}insert_wo_offset: 189; GCN: s_load_dword [[IN:s[0-9]+]] 190 191; MOVREL: s_mov_b32 m0, [[IN]] 192; MOVREL: v_movreld_b32_e32 v[[ELT0:[0-9]+]] 193 194; IDXMODE: s_set_gpr_idx_on [[IN]], dst 195; IDXMODE-NEXT: v_mov_b32_e32 v[[ELT0:[0-9]+]], v{{[0-9]+}} 196; IDXMODE-NEXT: s_set_gpr_idx_off 197 198; GCN: buffer_store_dwordx4 v{{\[}}[[ELT0]]: 199define void @insert_wo_offset(<4 x float> addrspace(1)* %out, i32 %in) { 200entry: 201 %0 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in 202 store <4 x float> %0, <4 x float> addrspace(1)* %out 203 ret void 204} 205 206; GCN-LABEL: {{^}}insert_neg_offset_sgpr: 207; The offset depends on the register that holds the first element of the vector. 208; MOVREL: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} 209; MOVREL: v_movreld_b32_e32 v0, 5 210 211; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}} 212; IDXMODE: s_set_gpr_idx_on [[ADD_IDX]], dst 213; IDXMODE-NEXT: v_mov_b32_e32 v0, 5 214; IDXMODE-NEXT: s_set_gpr_idx_off 215define void @insert_neg_offset_sgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, i32 %offset) { 216entry: 217 %index = add i32 %offset, -512 218 %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index 219 store <4 x i32> %value, <4 x i32> addrspace(1)* %out 220 ret void 221} 222 223; The vector indexed into is originally loaded into an SGPR rather 224; than built with a reg_sequence 225 226; GCN-LABEL: {{^}}insert_neg_offset_sgpr_loadreg: 227; The offset depends on the register that holds the first element of the vector. 228; MOVREL: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} 229; MOVREL: v_movreld_b32_e32 v0, 5 230 231; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}} 232; IDXMODE: s_set_gpr_idx_on [[ADD_IDX]], dst 233; IDXMODE-NEXT: v_mov_b32_e32 v0, 5 234; IDXMODE-NEXT: s_set_gpr_idx_off 235define void @insert_neg_offset_sgpr_loadreg(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %offset) { 236entry: 237 %index = add i32 %offset, -512 238 %value = insertelement <4 x i32> %vec, i32 5, i32 %index 239 store <4 x i32> %value, <4 x i32> addrspace(1)* %out 240 ret void 241} 242 243; GCN-LABEL: {{^}}insert_neg_offset_vgpr: 244; The offset depends on the register that holds the first element of the vector. 245 246; GCN-DAG: v_mov_b32_e32 [[VEC_ELT0:v[0-9]+]], 1{{$}} 247; GCN-DAG: v_mov_b32_e32 [[VEC_ELT1:v[0-9]+]], 2{{$}} 248; GCN-DAG: v_mov_b32_e32 [[VEC_ELT2:v[0-9]+]], 3{{$}} 249; GCN-DAG: v_mov_b32_e32 [[VEC_ELT3:v[0-9]+]], 4{{$}} 250 251; GCN: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec 252; GCN: s_waitcnt lgkmcnt(0) 253 254; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]: 255; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]] 256 257; MOVREL: s_add_i32 m0, [[READLANE]], 0xfffffe00 258; MOVREL: s_and_saveexec_b64 vcc, vcc 259; MOVREL: v_movreld_b32_e32 [[VEC_ELT0]], 5 260 261; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}} 262; IDXMODE: s_set_gpr_idx_idx [[ADD_IDX]] 263; IDXMODE: s_and_saveexec_b64 vcc, vcc 264; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 5 265 266; GCN: s_cbranch_execnz [[LOOPBB]] 267; GCN: s_mov_b64 exec, [[SAVEEXEC]] 268 269; IDXMODE: s_set_gpr_idx_off 270 271; GCN: buffer_store_dword 272define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) { 273entry: 274 %id = call i32 @llvm.amdgcn.workitem.id.x() #1 275 %index = add i32 %id, -512 276 %value = insertelement <4 x i32> <i32 1, i32 2, i32 3, i32 4>, i32 5, i32 %index 277 store <4 x i32> %value, <4 x i32> addrspace(1)* %out 278 ret void 279} 280 281; GCN-LABEL: {{^}}insert_neg_inline_offset_vgpr: 282 283; GCN-DAG: v_mov_b32_e32 [[VEC_ELT0:v[0-9]+]], 1{{$}} 284; GCN-DAG: v_mov_b32_e32 [[VEC_ELT1:v[0-9]+]], 2{{$}} 285; GCN-DAG: v_mov_b32_e32 [[VEC_ELT2:v[0-9]+]], 3{{$}} 286; GCN-DAG: v_mov_b32_e32 [[VEC_ELT3:v[0-9]+]], 4{{$}} 287; GCN-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x1f4{{$}} 288 289; IDXMODE: s_set_gpr_idx_on 0, dst 290 291; GCN: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec 292; GCN: s_waitcnt lgkmcnt(0) 293 294; The offset depends on the register that holds the first element of the vector. 295; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]] 296 297; MOVREL: s_add_i32 m0, [[READLANE]], -16 298; MOVREL: v_movreld_b32_e32 [[VEC_ELT0]], [[VAL]] 299 300; IDXMODE: s_add_i32 [[ADD_IDX:s[0-9]+]], [[READLANE]], -16 301; IDXMODE: s_set_gpr_idx_idx [[ADD_IDX]] 302; IDXMODE: v_mov_b32_e32 [[VEC_ELT0]], [[VAL]] 303 304; GCN: s_cbranch_execnz 305 306; IDXMODE: s_set_gpr_idx_off 307define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) { 308entry: 309 %id = call i32 @llvm.amdgcn.workitem.id.x() #1 310 %index = add i32 %id, -16 311 %value = insertelement <4 x i32> <i32 1, i32 2, i32 3, i32 4>, i32 500, i32 %index 312 store <4 x i32> %value, <4 x i32> addrspace(1)* %out 313 ret void 314} 315 316; When the block is split to insert the loop, make sure any other 317; places that need to be expanded in the same block are also handled. 318 319; GCN-LABEL: {{^}}extract_vgpr_offset_multiple_in_block: 320 321; FIXME: Why is vector copied in between? 322 323; GCN-DAG: {{buffer|flat}}_load_dword [[IDX0:v[0-9]+]] 324; GCN-DAG: s_mov_b32 [[S_ELT1:s[0-9]+]], 9 325; GCN-DAG: s_mov_b32 [[S_ELT0:s[0-9]+]], 7 326; GCN-DAG: v_mov_b32_e32 [[VEC_ELT0:v[0-9]+]], [[S_ELT0]] 327; GCN-DAG: v_mov_b32_e32 [[VEC_ELT1:v[0-9]+]], [[S_ELT1]] 328 329; IDXMODE: s_set_gpr_idx_on 0, src0 330 331; GCN: s_mov_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec 332; GCN: s_waitcnt vmcnt(0) 333 334; GCN: [[LOOP0:BB[0-9]+_[0-9]+]]: 335; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]] 336; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]] 337 338; MOVREL: s_mov_b32 m0, [[READLANE]] 339; MOVREL: s_and_saveexec_b64 vcc, vcc 340; MOVREL: v_movrels_b32_e32 [[MOVREL0:v[0-9]+]], [[VEC_ELT0]] 341 342; IDXMODE: s_set_gpr_idx_idx [[READLANE]] 343; IDXMODE: s_and_saveexec_b64 vcc, vcc 344; IDXMODE: v_mov_b32_e32 [[MOVREL0:v[0-9]+]], [[VEC_ELT0]] 345 346; GCN-NEXT: s_xor_b64 exec, exec, vcc 347; GCN-NEXT: s_cbranch_execnz [[LOOP0]] 348 349; FIXME: Redundant copy 350; GCN: s_mov_b64 exec, [[MASK]] 351; IDXMODE: s_set_gpr_idx_off 352 353; GCN: v_mov_b32_e32 [[VEC_ELT1_2:v[0-9]+]], [[S_ELT1]] 354 355; IDXMODE: s_set_gpr_idx_on 0, src0 356; GCN: s_mov_b64 [[MASK2:s\[[0-9]+:[0-9]+\]]], exec 357 358; GCN: [[LOOP1:BB[0-9]+_[0-9]+]]: 359; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]] 360; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]] 361 362; MOVREL: s_mov_b32 m0, [[READLANE]] 363; MOVREL: s_and_saveexec_b64 vcc, vcc 364; MOVREL-NEXT: v_movrels_b32_e32 [[MOVREL1:v[0-9]+]], [[VEC_ELT1_2]] 365 366; IDXMODE: s_set_gpr_idx_idx [[READLANE]] 367; IDXMODE: s_and_saveexec_b64 vcc, vcc 368; IDXMODE-NEXT: v_mov_b32_e32 [[MOVREL1:v[0-9]+]], [[VEC_ELT1_2]] 369 370; GCN-NEXT: s_xor_b64 exec, exec, vcc 371; GCN: s_cbranch_execnz [[LOOP1]] 372 373; IDXMODE: s_set_gpr_idx_off 374 375; GCN: buffer_store_dword [[MOVREL0]] 376; GCN: buffer_store_dword [[MOVREL1]] 377define void @extract_vgpr_offset_multiple_in_block(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 { 378entry: 379 %id = call i32 @llvm.amdgcn.workitem.id.x() #1 380 %id.ext = zext i32 %id to i64 381 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %id.ext 382 %idx0 = load volatile i32, i32 addrspace(1)* %gep 383 %idx1 = add i32 %idx0, 1 384 %val0 = extractelement <4 x i32> <i32 7, i32 9, i32 11, i32 13>, i32 %idx0 385 %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={SGPR4}" () 386 %val1 = extractelement <4 x i32> <i32 7, i32 9, i32 11, i32 13>, i32 %idx1 387 store volatile i32 %val0, i32 addrspace(1)* %out0 388 store volatile i32 %val1, i32 addrspace(1)* %out0 389 %cmp = icmp eq i32 %id, 0 390 br i1 %cmp, label %bb1, label %bb2 391 392bb1: 393 store volatile i32 %live.out.reg, i32 addrspace(1)* undef 394 br label %bb2 395 396bb2: 397 ret void 398} 399 400; GCN-LABEL: {{^}}insert_vgpr_offset_multiple_in_block: 401; GCN-DAG: s_load_dwordx4 s{{\[}}[[S_ELT0:[0-9]+]]:[[S_ELT3:[0-9]+]]{{\]}} 402; GCN-DAG: {{buffer|flat}}_load_dword [[IDX0:v[0-9]+]] 403; GCN-DAG: v_mov_b32 [[INS0:v[0-9]+]], 62 404 405; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT3:[0-9]+]], s[[S_ELT3]] 406; GCN: v_mov_b32_e32 v[[VEC_ELT2:[0-9]+]], s{{[0-9]+}} 407; GCN: v_mov_b32_e32 v[[VEC_ELT1:[0-9]+]], s{{[0-9]+}} 408; GCN: v_mov_b32_e32 v[[VEC_ELT0:[0-9]+]], s[[S_ELT0]] 409 410; IDXMODE: s_set_gpr_idx_on 0, dst 411 412; GCN: [[LOOP0:BB[0-9]+_[0-9]+]]: 413; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]] 414; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]] 415 416; MOVREL: s_mov_b32 m0, [[READLANE]] 417; MOVREL: s_and_saveexec_b64 vcc, vcc 418; MOVREL-NEXT: v_movreld_b32_e32 v[[VEC_ELT0]], [[INS0]] 419 420; IDXMODE: s_set_gpr_idx_idx [[READLANE]] 421; IDXMODE: s_and_saveexec_b64 vcc, vcc 422; IDXMODE-NEXT: v_mov_b32_e32 v[[VEC_ELT0]], [[INS0]] 423 424; GCN-NEXT: s_xor_b64 exec, exec, vcc 425; GCN: s_cbranch_execnz [[LOOP0]] 426 427; FIXME: Redundant copy 428; GCN: s_mov_b64 exec, [[MASK:s\[[0-9]+:[0-9]+\]]] 429; IDXMODE: s_set_gpr_idx_off 430 431; IDXMODE: s_set_gpr_idx_on 0, dst 432; GCN: s_mov_b64 [[MASK]], exec 433 434; GCN: [[LOOP1:BB[0-9]+_[0-9]+]]: 435; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]] 436; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]] 437 438; MOVREL: s_mov_b32 m0, [[READLANE]] 439; MOVREL: s_and_saveexec_b64 vcc, vcc 440; MOVREL-NEXT: v_movreld_b32_e32 v[[VEC_ELT1]], 63 441 442; IDXMODE: s_set_gpr_idx_idx [[READLANE]] 443; IDXMODE: s_and_saveexec_b64 vcc, vcc 444; IDXMODE-NEXT: v_mov_b32_e32 v[[VEC_ELT1]], 63 445 446; GCN-NEXT: s_xor_b64 exec, exec, vcc 447; GCN: s_cbranch_execnz [[LOOP1]] 448 449; GCN: buffer_store_dwordx4 v{{\[}}[[VEC_ELT0]]: 450 451; GCN: buffer_store_dword [[INS0]] 452define void @insert_vgpr_offset_multiple_in_block(<4 x i32> addrspace(1)* %out0, <4 x i32> addrspace(1)* %out1, i32 addrspace(1)* %in, <4 x i32> %vec0) #0 { 453entry: 454 %id = call i32 @llvm.amdgcn.workitem.id.x() #1 455 %id.ext = zext i32 %id to i64 456 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %id.ext 457 %idx0 = load volatile i32, i32 addrspace(1)* %gep 458 %idx1 = add i32 %idx0, 1 459 %live.out.val = call i32 asm sideeffect "v_mov_b32 $0, 62", "=v"() 460 %vec1 = insertelement <4 x i32> %vec0, i32 %live.out.val, i32 %idx0 461 %vec2 = insertelement <4 x i32> %vec1, i32 63, i32 %idx1 462 store volatile <4 x i32> %vec2, <4 x i32> addrspace(1)* %out0 463 %cmp = icmp eq i32 %id, 0 464 br i1 %cmp, label %bb1, label %bb2 465 466bb1: 467 store volatile i32 %live.out.val, i32 addrspace(1)* undef 468 br label %bb2 469 470bb2: 471 ret void 472} 473 474; GCN-LABEL: {{^}}extract_adjacent_blocks: 475; GCN: s_load_dword [[ARG:s[0-9]+]] 476; GCN: s_cmp_lg_u32 477; GCN: s_cbranch_scc0 [[BB4:BB[0-9]+_[0-9]+]] 478 479; GCN: buffer_load_dwordx4 480; MOVREL: s_mov_b32 m0, 481; MOVREL: v_movrels_b32_e32 482 483; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, src0 484; IDXMODE: v_mov_b32_e32 485; IDXMODE: s_set_gpr_idx_off 486 487; GCN: s_branch [[ENDBB:BB[0-9]+_[0-9]+]] 488 489; GCN: [[BB4]]: 490; GCN: buffer_load_dwordx4 491; MOVREL: s_mov_b32 m0, 492; MOVREL: v_movrels_b32_e32 493 494; IDXMODE: s_set_gpr_idx_on 495; IDXMODE: v_mov_b32_e32 496; IDXMODE: s_set_gpr_idx_off 497 498; GCN: [[ENDBB]]: 499; GCN: buffer_store_dword 500; GCN: s_endpgm 501define void @extract_adjacent_blocks(i32 %arg) #0 { 502bb: 503 %tmp = icmp eq i32 %arg, 0 504 br i1 %tmp, label %bb1, label %bb4 505 506bb1: 507 %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef 508 %tmp3 = extractelement <4 x float> %tmp2, i32 undef 509 call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out 510 br label %bb7 511 512bb4: 513 %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef 514 %tmp6 = extractelement <4 x float> %tmp5, i32 undef 515 call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out 516 br label %bb7 517 518bb7: 519 %tmp8 = phi float [ %tmp3, %bb1 ], [ %tmp6, %bb4 ] 520 store volatile float %tmp8, float addrspace(1)* undef 521 ret void 522} 523 524; GCN-LABEL: {{^}}insert_adjacent_blocks: 525; GCN: s_load_dword [[ARG:s[0-9]+]] 526; GCN: s_cmp_lg_u32 527; GCN: s_cbranch_scc0 [[BB4:BB[0-9]+_[0-9]+]] 528 529; GCN: buffer_load_dwordx4 530; MOVREL: s_mov_b32 m0, 531; MOVREL: v_movreld_b32_e32 532 533; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, dst 534; IDXMODE: v_mov_b32_e32 535; IDXMODE: s_set_gpr_idx_off 536 537; GCN: s_branch [[ENDBB:BB[0-9]+_[0-9]+]] 538 539; GCN: [[BB4]]: 540; GCN: buffer_load_dwordx4 541; MOVREL: s_mov_b32 m0, 542; MOVREL: v_movreld_b32_e32 543 544; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, dst 545; IDXMODE: v_mov_b32_e32 546; IDXMODE: s_set_gpr_idx_off 547 548; GCN: [[ENDBB]]: 549; GCN: buffer_store_dword 550; GCN: s_endpgm 551define void @insert_adjacent_blocks(i32 %arg, float %val0) #0 { 552bb: 553 %tmp = icmp eq i32 %arg, 0 554 br i1 %tmp, label %bb1, label %bb4 555 556bb1: ; preds = %bb 557 %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef 558 %tmp3 = insertelement <4 x float> %tmp2, float %val0, i32 undef 559 call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp3) #0 ; Prevent block optimize out 560 br label %bb7 561 562bb4: ; preds = %bb 563 %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef 564 %tmp6 = insertelement <4 x float> %tmp5, float %val0, i32 undef 565 call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp6) #0 ; Prevent block optimize out 566 br label %bb7 567 568bb7: ; preds = %bb4, %bb1 569 %tmp8 = phi <4 x float> [ %tmp3, %bb1 ], [ %tmp6, %bb4 ] 570 store volatile <4 x float> %tmp8, <4 x float> addrspace(1)* undef 571 ret void 572} 573 574; FIXME: Should be able to fold zero input to movreld to inline imm? 575 576; GCN-LABEL: {{^}}multi_same_block: 577 578; GCN-DAG: v_mov_b32_e32 v[[VEC0_ELT0:[0-9]+]], 0x41880000 579; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000 580; GCN-DAG: v_mov_b32_e32 v[[VEC0_ELT2:[0-9]+]], 0x41980000 581; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a00000 582; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a80000 583; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b00000 584; GCN-DAG: s_load_dword [[ARG:s[0-9]+]] 585; IDXMODE-DAG: s_add_i32 [[ARG_ADD:s[0-9]+]], [[ARG]], -16 586 587; MOVREL-DAG: s_add_i32 m0, [[ARG]], -16 588; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT0]], 4.0 589; GCN-NOT: m0 590 591; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst 592; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT0]], 4.0 593; IDXMODE: s_set_gpr_idx_off 594 595; GCN: v_mov_b32_e32 v[[VEC0_ELT2]], 0x4188cccd 596; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4190cccd 597; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4198cccd 598; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a0cccd 599; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a8cccd 600; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd 601 602; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT2]], -4.0 603 604; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst 605; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT2]], -4.0 606; IDXMODE: s_set_gpr_idx_off 607 608; GCN: s_mov_b32 m0, -1 609; GCN: ds_write_b32 610; GCN: ds_write_b32 611; GCN: s_endpgm 612define void @multi_same_block(i32 %arg) #0 { 613bb: 614 %tmp1 = add i32 %arg, -16 615 %tmp2 = insertelement <6 x float> <float 1.700000e+01, float 1.800000e+01, float 1.900000e+01, float 2.000000e+01, float 2.100000e+01, float 2.200000e+01>, float 4.000000e+00, i32 %tmp1 616 %tmp3 = add i32 %arg, -16 617 %tmp4 = insertelement <6 x float> <float 0x40311999A0000000, float 0x40321999A0000000, float 0x40331999A0000000, float 0x40341999A0000000, float 0x40351999A0000000, float 0x40361999A0000000>, float -4.0, i32 %tmp3 618 %tmp5 = bitcast <6 x float> %tmp2 to <6 x i32> 619 %tmp6 = extractelement <6 x i32> %tmp5, i32 1 620 %tmp7 = bitcast <6 x float> %tmp4 to <6 x i32> 621 %tmp8 = extractelement <6 x i32> %tmp7, i32 5 622 store volatile i32 %tmp6, i32 addrspace(3)* undef, align 4 623 store volatile i32 %tmp8, i32 addrspace(3)* undef, align 4 624 ret void 625} 626 627; offset puts outside of superegister bounaries, so clamp to 1st element. 628; GCN-LABEL: {{^}}extract_largest_inbounds_offset: 629; GCN-DAG: buffer_load_dwordx4 v{{\[}}[[LO_ELT:[0-9]+]]:[[HI_ELT:[0-9]+]]{{\]}} 630; GCN-DAG: s_load_dword [[IDX:s[0-9]+]] 631; MOVREL: s_mov_b32 m0, [[IDX]] 632; MOVREL: v_movrels_b32_e32 [[EXTRACT:v[0-9]+]], v[[HI_ELT]] 633 634; IDXMODE: s_set_gpr_idx_on [[IDX]], src0 635; IDXMODE: v_mov_b32_e32 [[EXTRACT:v[0-9]+]], v[[HI_ELT]] 636; IDXMODE: s_set_gpr_idx_off 637 638; GCN: buffer_store_dword [[EXTRACT]] 639define void @extract_largest_inbounds_offset(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx) { 640entry: 641 %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in 642 %offset = add i32 %idx, 3 643 %value = extractelement <4 x i32> %ld, i32 %offset 644 store i32 %value, i32 addrspace(1)* %out 645 ret void 646} 647 648; GCN-LABEL: {{^}}extract_out_of_bounds_offset: 649; GCN-DAG: buffer_load_dwordx4 v{{\[}}[[LO_ELT:[0-9]+]]:[[HI_ELT:[0-9]+]]{{\]}} 650; GCN-DAG: s_load_dword [[IDX:s[0-9]+]] 651; MOVREL: s_add_i32 m0, [[IDX]], 4 652; MOVREL: v_movrels_b32_e32 [[EXTRACT:v[0-9]+]], v[[LO_ELT]] 653 654; IDXMODE: s_add_i32 [[ADD_IDX:s[0-9]+]], [[IDX]], 4 655; IDXMODE: s_set_gpr_idx_on [[ADD_IDX]], src0 656; IDXMODE: v_mov_b32_e32 [[EXTRACT:v[0-9]+]], v[[LO_ELT]] 657; IDXMODE: s_set_gpr_idx_off 658 659; GCN: buffer_store_dword [[EXTRACT]] 660define void @extract_out_of_bounds_offset(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx) { 661entry: 662 %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in 663 %offset = add i32 %idx, 4 664 %value = extractelement <4 x i32> %ld, i32 %offset 665 store i32 %value, i32 addrspace(1)* %out 666 ret void 667} 668 669; Test that the or is folded into the base address register instead of 670; added to m0 671 672; GCN-LABEL: {{^}}extractelement_v4i32_or_index: 673; GCN: s_load_dword [[IDX_IN:s[0-9]+]] 674; GCN: s_lshl_b32 [[IDX_SHL:s[0-9]+]], [[IDX_IN]] 675; GCN-NOT: [[IDX_SHL]] 676 677; MOVREL: s_mov_b32 m0, [[IDX_SHL]] 678; MOVREL: v_movrels_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} 679 680; IDXMODE: s_set_gpr_idx_on [[IDX_SHL]], src0 681; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} 682; IDXMODE: s_set_gpr_idx_off 683define void @extractelement_v4i32_or_index(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx.in) { 684entry: 685 %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in 686 %idx.shl = shl i32 %idx.in, 2 687 %idx = or i32 %idx.shl, 1 688 %value = extractelement <4 x i32> %ld, i32 %idx 689 store i32 %value, i32 addrspace(1)* %out 690 ret void 691} 692 693; GCN-LABEL: {{^}}insertelement_v4f32_or_index: 694; GCN: s_load_dword [[IDX_IN:s[0-9]+]] 695; GCN: s_lshl_b32 [[IDX_SHL:s[0-9]+]], [[IDX_IN]] 696; GCN-NOT: [[IDX_SHL]] 697 698; MOVREL: s_mov_b32 m0, [[IDX_SHL]] 699; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} 700 701; IDXMODE: s_set_gpr_idx_on [[IDX_SHL]], dst 702; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} 703; IDXMODE: s_set_gpr_idx_off 704define void @insertelement_v4f32_or_index(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %idx.in) nounwind { 705 %idx.shl = shl i32 %idx.in, 2 706 %idx = or i32 %idx.shl, 1 707 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %idx 708 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16 709 ret void 710} 711 712; GCN-LABEL: {{^}}broken_phi_bb: 713; GCN: v_mov_b32_e32 [[PHIREG:v[0-9]+]], 8 714 715; GCN: s_branch [[BB2:BB[0-9]+_[0-9]+]] 716 717; GCN: {{^BB[0-9]+_[0-9]+}}: 718; GCN: s_mov_b64 exec, 719; IDXMODE: s_set_gpr_idx_off 720 721; GCN: [[BB2]]: 722; GCN: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, [[PHIREG]] 723; GCN: buffer_load_dword 724 725; GCN: [[REGLOOP:BB[0-9]+_[0-9]+]]: 726; MOVREL: v_movreld_b32_e32 727 728; IDXMODE: s_set_gpr_idx_idx 729; IDXMODE: v_mov_b32_e32 730; GCN: s_cbranch_execnz [[REGLOOP]] 731define void @broken_phi_bb(i32 %arg, i32 %arg1) #0 { 732bb: 733 br label %bb2 734 735bb2: ; preds = %bb4, %bb 736 %tmp = phi i32 [ 8, %bb ], [ %tmp7, %bb4 ] 737 %tmp3 = icmp slt i32 %tmp, %arg 738 br i1 %tmp3, label %bb4, label %bb8 739 740bb4: ; preds = %bb2 741 %vgpr = load volatile i32, i32 addrspace(1)* undef 742 %tmp5 = insertelement <8 x i32> undef, i32 undef, i32 %vgpr 743 %tmp6 = insertelement <8 x i32> %tmp5, i32 %arg1, i32 %vgpr 744 %tmp7 = extractelement <8 x i32> %tmp6, i32 0 745 br label %bb2 746 747bb8: ; preds = %bb2 748 ret void 749} 750 751declare i32 @llvm.amdgcn.workitem.id.x() #1 752declare void @llvm.amdgcn.s.barrier() #2 753 754attributes #0 = { nounwind } 755attributes #1 = { nounwind readnone } 756attributes #2 = { nounwind convergent } 757