1; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 \
2; RUN:   -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s
3
4; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr9 \
5; RUN:   -verify-machineinstrs -vec-extabi | \
6; RUN:   FileCheck %s --check-prefixes=AIX,AIX64
7; RUN: llc < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr9 \
8; RUN:   -verify-machineinstrs  -vec-extabi | \
9; RUN:   FileCheck %s --check-prefixes=AIX,AIX32
10
11define dso_local void @test(i32* %Arr, i32 signext %Len) {
12; CHECK-LABEL: test:
13; CHECK:         lxv [[REG:vs[0-9]+]], 0(r{{[0-9]+}})
14; CHECK-NOT:     [[REG]]
15; CHECK:         xxbrw vs{{[0-9]+}}, [[REG]]
16
17; AIX-LABEL:     test:
18; AIX64:         lxv [[REG64:[0-9]+]], {{[0-9]+}}({{[0-9]+}})
19; AIX32:         lxv [[REG32:[0-9]+]], {{[0-9]+}}({{[0-9]+}})
20; AIX64-NOT:     [[REG64]]
21; AIX64:         xxbrw {{[0-9]+}}, [[REG64]]
22; AIX32:         xxbrw {{[0-9]+}}, [[REG32]]
23entry:
24  %cmp1 = icmp slt i32 0, %Len
25  br i1 %cmp1, label %for.body.lr.ph, label %for.cond.cleanup
26
27for.body.lr.ph:                                   ; preds = %entry
28  %min.iters.check = icmp ult i32 %Len, 4
29  br i1 %min.iters.check, label %scalar.ph, label %vector.ph
30
31vector.ph:                                        ; preds = %for.body.lr.ph
32  %n.mod.vf = urem i32 %Len, 4
33  %n.vec = sub i32 %Len, %n.mod.vf
34  br label %vector.body
35
36vector.body:                                      ; preds = %vector.body, %vector.ph
37  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
38  %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
39  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
40  %induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
41  %0 = add i32 %index, 0
42  %1 = sext i32 %0 to i64
43  %2 = getelementptr inbounds i32, i32* %Arr, i64 %1
44  %3 = getelementptr inbounds i32, i32* %2, i32 0
45  %4 = bitcast i32* %3 to <4 x i32>*
46  %wide.load = load <4 x i32>, <4 x i32>* %4, align 4
47  %5 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %wide.load)
48  %6 = sext i32 %0 to i64
49  %7 = getelementptr inbounds i32, i32* %Arr, i64 %6
50  %8 = getelementptr inbounds i32, i32* %7, i32 0
51  %9 = bitcast i32* %8 to <4 x i32>*
52  store <4 x i32> %5, <4 x i32>* %9, align 4
53  %index.next = add i32 %index, 4
54  %10 = icmp eq i32 %index.next, %n.vec
55  br i1 %10, label %middle.block, label %vector.body
56
57middle.block:                                     ; preds = %vector.body
58  %cmp.n = icmp eq i32 %Len, %n.vec
59  br i1 %cmp.n, label %for.cond.for.cond.cleanup_crit_edge, label %scalar.ph
60
61scalar.ph:                                        ; preds = %middle.block, %for.body.lr.ph
62  %bc.resume.val = phi i32 [ %n.vec, %middle.block ], [ 0, %for.body.lr.ph ]
63  br label %for.body
64
65for.cond.for.cond.cleanup_crit_edge:              ; preds = %middle.block, %for.inc
66  br label %for.cond.cleanup
67
68for.cond.cleanup:                                 ; preds = %for.cond.for.cond.cleanup_crit_edge, %entry
69  br label %for.end
70
71for.body:                                         ; preds = %for.inc, %scalar.ph
72  %i.02 = phi i32 [ %bc.resume.val, %scalar.ph ], [ %inc, %for.inc ]
73  %idxprom = sext i32 %i.02 to i64
74  %arrayidx = getelementptr inbounds i32, i32* %Arr, i64 %idxprom
75  %11 = load i32, i32* %arrayidx, align 4
76  %12 = call i32 @llvm.bswap.i32(i32 %11)
77  %idxprom1 = sext i32 %i.02 to i64
78  %arrayidx2 = getelementptr inbounds i32, i32* %Arr, i64 %idxprom1
79  store i32 %12, i32* %arrayidx2, align 4
80  br label %for.inc
81
82for.inc:                                          ; preds = %for.body
83  %inc = add nsw i32 %i.02, 1
84  %cmp = icmp slt i32 %inc, %Len
85  br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge
86
87for.end:                                          ; preds = %for.cond.cleanup
88  ret void
89}
90
91define dso_local <8 x i16> @test_halfword(<8 x i16> %a) local_unnamed_addr {
92; CHECK-LABEL: test_halfword:
93; CHECK:       xxbrh vs34, vs34
94; CHECK-NEXT:  blr
95
96; AIX-LABEL:   test_halfword:
97; AIX:         xxbrh 34, 34
98; AIX-NEXT:    blr
99entry:
100  %0 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a)
101  ret <8 x i16> %0
102}
103
104define dso_local <2 x i64> @test_doubleword(<2 x i64> %a) local_unnamed_addr {
105; CHECK-LABEL: test_doubleword:
106; CHECK:       xxbrd vs34, vs34
107; CHECK-NEXT:  blr
108
109; AIX-LABEL:   test_doubleword:
110; AIX:         xxbrd 34, 34
111; AIX-NEXT:    blr
112entry:
113  %0 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a)
114  ret <2 x i64> %0
115}
116
117define dso_local <1 x i128> @test_quadword(<1 x i128> %a) local_unnamed_addr {
118; CHECK-LABEL: test_quadword:
119; CHECK:       xxbrq vs34, vs34
120; CHECK-NEXT:  blr
121
122; AIX-LABEL:   test_quadword:
123; AIX:         xxbrq 34, 34
124; AIX-NEXT:    blr
125entry:
126  %0 = call <1 x i128> @llvm.bswap.v1i128(<1 x i128> %a)
127  ret <1 x i128> %0
128}
129
130; Function Attrs: nounwind readnone speculatable willreturn
131declare <1 x i128> @llvm.bswap.v1i128(<1 x i128>)
132
133; Function Attrs: nounwind readnone speculatable willreturn
134declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
135
136; Function Attrs: nounwind readnone speculatable willreturn
137declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
138
139; Function Attrs: nounwind readnone speculatable willreturn
140declare i32 @llvm.bswap.i32(i32)
141
142; Function Attrs: nounwind readnone speculatable willreturn
143declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
144