1*35f70200SMichael Kruse; RUN: opt %loadPolly -polly-import-jscop \ 2*35f70200SMichael Kruse; RUN: -polly-codegen -S < %s | FileCheck %s 3*35f70200SMichael Kruse; RUN: opt %loadPolly -polly-import-jscop \ 4*35f70200SMichael Kruse; RUN: -polly-codegen -polly-import-jscop-postfix=pow2 \ 5*35f70200SMichael Kruse; RUN: -S < %s | FileCheck %s -check-prefix=POW2 6*35f70200SMichael Kruse; 7*35f70200SMichael Kruse; void exprModDiv(float *A, float *B, float *C, long N, long p) { 8*35f70200SMichael Kruse; for (long i = 0; i < N; i++) 9*35f70200SMichael Kruse; C[i] += A[i] + B[i] + A[i] + B[i + p]; 10*35f70200SMichael Kruse; } 11*35f70200SMichael Kruse; 12*35f70200SMichael Kruse; 13*35f70200SMichael Kruse; This test case changes the access functions such that the resulting index 14*35f70200SMichael Kruse; expressions are modulo or division operations. We test that the code we 15*35f70200SMichael Kruse; generate takes advantage of knowledge about unsigned numerators. This is 16*35f70200SMichael Kruse; useful as LLVM will translate urem and udiv operations with power-of-two 17*35f70200SMichael Kruse; denominators to fast bitwise and or shift operations. 18*35f70200SMichael Kruse 19*35f70200SMichael Kruse; A[i % 127] 20*35f70200SMichael Kruse; CHECK: %pexp.pdiv_r = urem i64 %polly.indvar, 127 21*35f70200SMichael Kruse; CHECK: %polly.access.A9 = getelementptr float, float* %A, i64 %pexp.pdiv_r 22*35f70200SMichael Kruse 23*35f70200SMichael Kruse; A[floor(i / 127)] 24*35f70200SMichael Kruse; 25*35f70200SMichael Kruse; Note: without the floor, we would create a map i -> i/127, which only contains 26*35f70200SMichael Kruse; values of i that are divisible by 127. All other values of i would not 27*35f70200SMichael Kruse; be mapped to any value. However, to generate correct code we require 28*35f70200SMichael Kruse; each value of i to indeed be mapped to a value. 29*35f70200SMichael Kruse; 30*35f70200SMichael Kruse; CHECK: %pexp.p_div_q = udiv i64 %polly.indvar, 127 31*35f70200SMichael Kruse; CHECK: %polly.access.B10 = getelementptr float, float* %B, i64 %pexp.p_div_q 32*35f70200SMichael Kruse 33*35f70200SMichael Kruse; A[p % 128] 34*35f70200SMichael Kruse; CHECK: %polly.access.A11 = getelementptr float, float* %A, i64 0 35*35f70200SMichael Kruse 36*35f70200SMichael Kruse; A[p / 127] 37*35f70200SMichael Kruse; CHECK: %pexp.div = sdiv exact i64 %p, 127 38*35f70200SMichael Kruse; CHECK: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div 39*35f70200SMichael Kruse 40*35f70200SMichael Kruse; A[i % 128] 41*35f70200SMichael Kruse; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128 42*35f70200SMichael Kruse; POW2: %polly.access.A9 = getelementptr float, float* %A, i64 %pexp.pdiv_r 43*35f70200SMichael Kruse 44*35f70200SMichael Kruse; A[floor(i / 128)] 45*35f70200SMichael Kruse; POW2: %pexp.p_div_q = udiv i64 %polly.indvar, 128 46*35f70200SMichael Kruse; POW2: %polly.access.B10 = getelementptr float, float* %B, i64 %pexp.p_div_q 47*35f70200SMichael Kruse 48*35f70200SMichael Kruse; A[p % 128] 49*35f70200SMichael Kruse; POW2: %polly.access.A11 = getelementptr float, float* %A, i64 0 50*35f70200SMichael Kruse 51*35f70200SMichael Kruse; A[p / 128] 52*35f70200SMichael Kruse; POW2: %pexp.div = sdiv exact i64 %p, 128 53*35f70200SMichael Kruse; POW2: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div 54*35f70200SMichael Kruse 55*35f70200SMichael Krusetarget datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 56*35f70200SMichael Kruse 57*35f70200SMichael Krusedefine void @exprModDiv(float* %A, float* %B, float* %C, i64 %N, i64 %p) { 58*35f70200SMichael Kruseentry: 59*35f70200SMichael Kruse br label %for.cond 60*35f70200SMichael Kruse 61*35f70200SMichael Krusefor.cond: ; preds = %for.inc, %entry 62*35f70200SMichael Kruse %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] 63*35f70200SMichael Kruse %cmp = icmp slt i64 %i.0, %N 64*35f70200SMichael Kruse br i1 %cmp, label %for.body, label %for.end 65*35f70200SMichael Kruse 66*35f70200SMichael Krusefor.body: ; preds = %for.cond 67*35f70200SMichael Kruse %arrayidx = getelementptr inbounds float, float* %A, i64 %i.0 68*35f70200SMichael Kruse %tmp = load float, float* %arrayidx, align 4 69*35f70200SMichael Kruse %arrayidx1 = getelementptr inbounds float, float* %B, i64 %i.0 70*35f70200SMichael Kruse %tmp1 = load float, float* %arrayidx1, align 4 71*35f70200SMichael Kruse %add = fadd float %tmp, %tmp1 72*35f70200SMichael Kruse %arrayidx2 = getelementptr inbounds float, float* %A, i64 %i.0 73*35f70200SMichael Kruse %tmp2 = load float, float* %arrayidx2, align 4 74*35f70200SMichael Kruse %add3 = fadd float %add, %tmp2 75*35f70200SMichael Kruse %padd = add nsw i64 %p, %i.0 76*35f70200SMichael Kruse %arrayidx4 = getelementptr inbounds float, float* %B, i64 %padd 77*35f70200SMichael Kruse %tmp3 = load float, float* %arrayidx4, align 4 78*35f70200SMichael Kruse %add5 = fadd float %add3, %tmp3 79*35f70200SMichael Kruse %arrayidx6 = getelementptr inbounds float, float* %C, i64 %i.0 80*35f70200SMichael Kruse %tmp4 = load float, float* %arrayidx6, align 4 81*35f70200SMichael Kruse %add7 = fadd float %tmp4, %add5 82*35f70200SMichael Kruse store float %add7, float* %arrayidx6, align 4 83*35f70200SMichael Kruse br label %for.inc 84*35f70200SMichael Kruse 85*35f70200SMichael Krusefor.inc: ; preds = %for.body 86*35f70200SMichael Kruse %inc = add nuw nsw i64 %i.0, 1 87*35f70200SMichael Kruse br label %for.cond 88*35f70200SMichael Kruse 89*35f70200SMichael Krusefor.end: ; preds = %for.cond 90*35f70200SMichael Kruse ret void 91*35f70200SMichael Kruse} 92