1*35f70200SMichael Kruse; RUN: opt %loadPolly -polly-import-jscop \
2*35f70200SMichael Kruse; RUN:     -polly-codegen -S < %s | FileCheck %s
3*35f70200SMichael Kruse; RUN: opt %loadPolly -polly-import-jscop \
4*35f70200SMichael Kruse; RUN:     -polly-codegen -polly-import-jscop-postfix=pow2 \
5*35f70200SMichael Kruse; RUN:     -S < %s | FileCheck %s -check-prefix=POW2
6*35f70200SMichael Kruse;
7*35f70200SMichael Kruse;    void exprModDiv(float *A, float *B, float *C, long N, long p) {
8*35f70200SMichael Kruse;      for (long i = 0; i < N; i++)
9*35f70200SMichael Kruse;        C[i] += A[i] + B[i] + A[i] + B[i + p];
10*35f70200SMichael Kruse;    }
11*35f70200SMichael Kruse;
12*35f70200SMichael Kruse;
13*35f70200SMichael Kruse; This test case changes the access functions such that the resulting index
14*35f70200SMichael Kruse; expressions are modulo or division operations. We test that the code we
15*35f70200SMichael Kruse; generate takes advantage of knowledge about unsigned numerators. This is
16*35f70200SMichael Kruse; useful as LLVM will translate urem and udiv operations with power-of-two
17*35f70200SMichael Kruse; denominators to fast bitwise and or shift operations.
18*35f70200SMichael Kruse
19*35f70200SMichael Kruse; A[i % 127]
20*35f70200SMichael Kruse; CHECK:  %pexp.pdiv_r = urem i64 %polly.indvar, 127
21*35f70200SMichael Kruse; CHECK:  %polly.access.A9 = getelementptr float, float* %A, i64 %pexp.pdiv_r
22*35f70200SMichael Kruse
23*35f70200SMichael Kruse; A[floor(i / 127)]
24*35f70200SMichael Kruse;
25*35f70200SMichael Kruse; Note: without the floor, we would create a map i -> i/127, which only contains
26*35f70200SMichael Kruse;       values of i that are divisible by 127. All other values of i would not
27*35f70200SMichael Kruse;       be mapped to any value. However, to generate correct code we require
28*35f70200SMichael Kruse;       each value of i to indeed be mapped to a value.
29*35f70200SMichael Kruse;
30*35f70200SMichael Kruse; CHECK:  %pexp.p_div_q = udiv i64 %polly.indvar, 127
31*35f70200SMichael Kruse; CHECK:  %polly.access.B10 = getelementptr float, float* %B, i64 %pexp.p_div_q
32*35f70200SMichael Kruse
33*35f70200SMichael Kruse; A[p % 128]
34*35f70200SMichael Kruse; CHECK:  %polly.access.A11 = getelementptr float, float* %A, i64 0
35*35f70200SMichael Kruse
36*35f70200SMichael Kruse; A[p / 127]
37*35f70200SMichael Kruse; CHECK:  %pexp.div = sdiv exact i64 %p, 127
38*35f70200SMichael Kruse; CHECK:  %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div
39*35f70200SMichael Kruse
40*35f70200SMichael Kruse; A[i % 128]
41*35f70200SMichael Kruse; POW2:  %pexp.pdiv_r = urem i64 %polly.indvar, 128
42*35f70200SMichael Kruse; POW2:  %polly.access.A9 = getelementptr float, float* %A, i64 %pexp.pdiv_r
43*35f70200SMichael Kruse
44*35f70200SMichael Kruse; A[floor(i / 128)]
45*35f70200SMichael Kruse; POW2:  %pexp.p_div_q = udiv i64 %polly.indvar, 128
46*35f70200SMichael Kruse; POW2:  %polly.access.B10 = getelementptr float, float* %B, i64 %pexp.p_div_q
47*35f70200SMichael Kruse
48*35f70200SMichael Kruse; A[p % 128]
49*35f70200SMichael Kruse; POW2:  %polly.access.A11 = getelementptr float, float* %A, i64 0
50*35f70200SMichael Kruse
51*35f70200SMichael Kruse; A[p / 128]
52*35f70200SMichael Kruse; POW2:  %pexp.div = sdiv exact i64 %p, 128
53*35f70200SMichael Kruse; POW2:  %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div
54*35f70200SMichael Kruse
55*35f70200SMichael Krusetarget datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
56*35f70200SMichael Kruse
57*35f70200SMichael Krusedefine void @exprModDiv(float* %A, float* %B, float* %C, i64 %N, i64 %p) {
58*35f70200SMichael Kruseentry:
59*35f70200SMichael Kruse  br label %for.cond
60*35f70200SMichael Kruse
61*35f70200SMichael Krusefor.cond:                                         ; preds = %for.inc, %entry
62*35f70200SMichael Kruse  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
63*35f70200SMichael Kruse  %cmp = icmp slt i64 %i.0, %N
64*35f70200SMichael Kruse  br i1 %cmp, label %for.body, label %for.end
65*35f70200SMichael Kruse
66*35f70200SMichael Krusefor.body:                                         ; preds = %for.cond
67*35f70200SMichael Kruse  %arrayidx = getelementptr inbounds float, float* %A, i64 %i.0
68*35f70200SMichael Kruse  %tmp = load float, float* %arrayidx, align 4
69*35f70200SMichael Kruse  %arrayidx1 = getelementptr inbounds float, float* %B, i64 %i.0
70*35f70200SMichael Kruse  %tmp1 = load float, float* %arrayidx1, align 4
71*35f70200SMichael Kruse  %add = fadd float %tmp, %tmp1
72*35f70200SMichael Kruse  %arrayidx2 = getelementptr inbounds float, float* %A, i64 %i.0
73*35f70200SMichael Kruse  %tmp2 = load float, float* %arrayidx2, align 4
74*35f70200SMichael Kruse  %add3 = fadd float %add, %tmp2
75*35f70200SMichael Kruse  %padd = add nsw i64 %p, %i.0
76*35f70200SMichael Kruse  %arrayidx4 = getelementptr inbounds float, float* %B, i64 %padd
77*35f70200SMichael Kruse  %tmp3 = load float, float* %arrayidx4, align 4
78*35f70200SMichael Kruse  %add5 = fadd float %add3, %tmp3
79*35f70200SMichael Kruse  %arrayidx6 = getelementptr inbounds float, float* %C, i64 %i.0
80*35f70200SMichael Kruse  %tmp4 = load float, float* %arrayidx6, align 4
81*35f70200SMichael Kruse  %add7 = fadd float %tmp4, %add5
82*35f70200SMichael Kruse  store float %add7, float* %arrayidx6, align 4
83*35f70200SMichael Kruse  br label %for.inc
84*35f70200SMichael Kruse
85*35f70200SMichael Krusefor.inc:                                          ; preds = %for.body
86*35f70200SMichael Kruse  %inc = add nuw nsw i64 %i.0, 1
87*35f70200SMichael Kruse  br label %for.cond
88*35f70200SMichael Kruse
89*35f70200SMichael Krusefor.end:                                          ; preds = %for.cond
90*35f70200SMichael Kruse  ret void
91*35f70200SMichael Kruse}
92