1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; How the replacement of i64 stores with v2i32 stores resulted in
5; breaking other users of the bitcast if they already existed
6
7; GCN-LABEL: {{^}}extract_vector_elt_select_error:
8; GCN: buffer_store_dword
9; GCN: buffer_store_dword
10; GCN: buffer_store_dwordx2
11define amdgpu_kernel void @extract_vector_elt_select_error(i32 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %val) #0 {
12  %vec = bitcast i64 %val to <2 x i32>
13  %elt0 = extractelement <2 x i32> %vec, i32 0
14  %elt1 = extractelement <2 x i32> %vec, i32 1
15
16  store volatile i32 %elt0, i32 addrspace(1)* %out
17  store volatile i32 %elt1, i32 addrspace(1)* %out
18  store volatile i64 %val, i64 addrspace(1)* %in
19  ret void
20}
21
22; GCN-LABEL: {{^}}extract_vector_elt_v2i64:
23define amdgpu_kernel void @extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo) #0 {
24  %p0 = extractelement <2 x i64> %foo, i32 0
25  %p1 = extractelement <2 x i64> %foo, i32 1
26  %out1 = getelementptr i64, i64 addrspace(1)* %out, i32 1
27  store volatile i64 %p1, i64 addrspace(1)* %out
28  store volatile i64 %p0, i64 addrspace(1)* %out1
29  ret void
30}
31
32; GCN-LABEL: {{^}}dyn_extract_vector_elt_v2i64:
33; GCN-NOT: buffer_load
34; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
35; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
36; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
37; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
38; GCN: store_dwordx2 v[{{[0-9:]+}}]
39define amdgpu_kernel void @dyn_extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo, i32 %elt) #0 {
40  %dynelt = extractelement <2 x i64> %foo, i32 %elt
41  store volatile i64 %dynelt, i64 addrspace(1)* %out
42  ret void
43}
44
45; GCN-LABEL: {{^}}dyn_extract_vector_elt_v2i64_2:
46; GCN:     buffer_load_dwordx4
47; GCN-NOT: buffer_load
48; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
49; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
50; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
51; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
52; GCN: store_dwordx2 v[{{[0-9:]+}}]
53define amdgpu_kernel void @dyn_extract_vector_elt_v2i64_2(i64 addrspace(1)* %out, <2 x i64> addrspace(1)* %foo, i32 %elt, <2 x i64> %arst) #0 {
54  %load = load volatile <2 x i64>, <2 x i64> addrspace(1)* %foo
55  %or = or <2 x i64> %load, %arst
56  %dynelt = extractelement <2 x i64> %or, i32 %elt
57  store volatile i64 %dynelt, i64 addrspace(1)* %out
58  ret void
59}
60
61; GCN-LABEL: {{^}}dyn_extract_vector_elt_v3i64:
62; GCN-NOT: buffer_load
63; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
64; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
65; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2
66; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0
67; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
68; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]]
69; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
70; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]]
71; GCN: store_dwordx2 v[{{[0-9:]+}}]
72define amdgpu_kernel void @dyn_extract_vector_elt_v3i64(i64 addrspace(1)* %out, <3 x i64> %foo, i32 %elt) #0 {
73  %dynelt = extractelement <3 x i64> %foo, i32 %elt
74  store volatile i64 %dynelt, i64 addrspace(1)* %out
75  ret void
76}
77
78; GCN-LABEL: {{^}}dyn_extract_vector_elt_v4i64:
79; GCN-NOT: buffer_load
80; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
81; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
82; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2
83; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0
84; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3
85; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0
86; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
87; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]]
88; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
89; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]]
90; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]]
91; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]]
92; GCN: store_dwordx2 v[{{[0-9:]+}}]
93define amdgpu_kernel void @dyn_extract_vector_elt_v4i64(i64 addrspace(1)* %out, <4 x i64> %foo, i32 %elt) #0 {
94  %dynelt = extractelement <4 x i64> %foo, i32 %elt
95  store volatile i64 %dynelt, i64 addrspace(1)* %out
96  ret void
97}
98
99attributes #0 = { nounwind }
100