1; RUN: llc < %s -march=amdgcn -mcpu=fiji -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=VI %s
2
3
4declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
5
6; FIXME: Need to handle non-uniform case for function below (load without gep).
7; GCN-LABEL: {{^}}v_test_imax_sge_i16:
8; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
9define void @v_test_imax_sge_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
10  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
11  %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
12  %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
13  %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
14  %a = load i16, i16 addrspace(1)* %gep0, align 4
15  %b = load i16, i16 addrspace(1)* %gep1, align 4
16  %cmp = icmp sge i16 %a, %b
17  %val = select i1 %cmp, i16 %a, i16 %b
18  store i16 %val, i16 addrspace(1)* %outgep, align 4
19  ret void
20}
21
22; FIXME: Need to handle non-uniform case for function below (load without gep).
23; GCN-LABEL: {{^}}v_test_imax_sge_v4i16:
24; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
25; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
26; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
27; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
28define void @v_test_imax_sge_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %aptr, <4 x i16> addrspace(1)* %bptr) nounwind {
29  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
30  %gep0 = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %aptr, i32 %tid
31  %gep1 = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %bptr, i32 %tid
32  %outgep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %out, i32 %tid
33  %a = load <4 x i16>, <4 x i16> addrspace(1)* %gep0, align 4
34  %b = load <4 x i16>, <4 x i16> addrspace(1)* %gep1, align 4
35  %cmp = icmp sge <4 x i16> %a, %b
36  %val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
37  store <4 x i16> %val, <4 x i16> addrspace(1)* %outgep, align 4
38  ret void
39}
40
41; FIXME: Need to handle non-uniform case for function below (load without gep).
42; GCN-LABEL: {{^}}v_test_imax_sgt_i16:
43; VI: v_max_i16_e32
44define void @v_test_imax_sgt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
45  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
46  %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
47  %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
48  %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
49  %a = load i16, i16 addrspace(1)* %gep0, align 4
50  %b = load i16, i16 addrspace(1)* %gep1, align 4
51  %cmp = icmp sgt i16 %a, %b
52  %val = select i1 %cmp, i16 %a, i16 %b
53  store i16 %val, i16 addrspace(1)* %outgep, align 4
54  ret void
55}
56
57; FIXME: Need to handle non-uniform case for function below (load without gep).
58; GCN-LABEL: {{^}}v_test_umax_uge_i16:
59; VI: v_max_u16_e32
60define void @v_test_umax_uge_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
61  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
62  %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
63  %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
64  %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
65  %a = load i16, i16 addrspace(1)* %gep0, align 4
66  %b = load i16, i16 addrspace(1)* %gep1, align 4
67  %cmp = icmp uge i16 %a, %b
68  %val = select i1 %cmp, i16 %a, i16 %b
69  store i16 %val, i16 addrspace(1)* %outgep, align 4
70  ret void
71}
72
73; FIXME: Need to handle non-uniform case for function below (load without gep).
74; GCN-LABEL: {{^}}v_test_umax_ugt_i16:
75; VI: v_max_u16_e32
76define void @v_test_umax_ugt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
77  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
78  %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
79  %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
80  %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
81  %a = load i16, i16 addrspace(1)* %gep0, align 4
82  %b = load i16, i16 addrspace(1)* %gep1, align 4
83  %cmp = icmp ugt i16 %a, %b
84  %val = select i1 %cmp, i16 %a, i16 %b
85  store i16 %val, i16 addrspace(1)* %outgep, align 4
86  ret void
87}
88