1; RUN: opt %loadPolly -polly-tile-sizes=256,16 -polly-opt-isl -polly-print-ast -disable-output < %s | FileCheck %s 2; RUN: opt %loadPolly -polly-tile-sizes=256,16 -polly-tiling=false -polly-opt-isl -polly-print-ast -disable-output < %s | FileCheck %s --check-prefix=NOTILING 3; RUN: opt %loadPolly -polly-tile-sizes=256,16 -polly-2nd-level-tiling -polly-2nd-level-tile-sizes=16,8 -polly-opt-isl -polly-print-ast -disable-output < %s | FileCheck %s --check-prefix=TWOLEVEL 4; RUN: opt %loadPolly -polly-tile-sizes=256,16 -polly-2nd-level-tiling -polly-2nd-level-tile-sizes=16,8 -polly-register-tiling -polly-opt-isl -polly-print-ast -disable-output < %s | FileCheck %s --check-prefix=TWO-PLUS-REGISTER 5; RUN: opt %loadPolly -polly-tile-sizes=256,16 -polly-2nd-level-tiling -polly-2nd-level-tile-sizes=16,8 -polly-register-tiling -polly-register-tile-sizes=2,4 -polly-vectorizer=polly -polly-opt-isl -polly-print-ast -disable-output < %s | FileCheck %s --check-prefix=TWO-PLUS-REGISTER-PLUS-VECTORIZATION 6 7; CHECK: // 1st level tiling - Tiles 8; CHECK: for (int c0 = 0; c0 <= 3; c0 += 1) 9; CHECK: for (int c1 = 0; c1 <= 31; c1 += 1) 10; CHECK: // 1st level tiling - Points 11; CHECK: for (int c2 = 0; c2 <= 255; c2 += 1) 12; CHECK: for (int c3 = 0; c3 <= 15; c3 += 1) 13; CHECK: Stmt_for_body3(256 * c0 + c2, 16 * c1 + c3); 14 15; NOTILING: for (int c0 = 0; c0 <= 1023; c0 += 1) 16; NOTILING: for (int c1 = 0; c1 <= 511; c1 += 1) 17; NOTILING: Stmt_for_body3(c0, c1); 18 19 20; TWOLEVEL: // 1st level tiling - Tiles 21; TWOLEVEL: for (int c0 = 0; c0 <= 3; c0 += 1) 22; TWOLEVEL: for (int c1 = 0; c1 <= 31; c1 += 1) 23; TWOLEVEL: // 1st level tiling - Points 24; TWOLEVEL: // 2nd level tiling - Tiles 25; TWOLEVEL: for (int c2 = 0; c2 <= 15; c2 += 1) 26; TWOLEVEL: for (int c3 = 0; c3 <= 1; c3 += 1) 27; TWOLEVEL: // 2nd level tiling - Points 28; TWOLEVEL: for (int c4 = 0; c4 <= 15; c4 += 1) 29; TWOLEVEL: for (int c5 = 0; c5 <= 7; c5 += 1) 30; TWOLEVEL: Stmt_for_body3(256 * c0 + 16 * c2 + c4, 16 * c1 + 8 * c3 + c5); 31 32 33; TWO-PLUS-REGISTER: // 1st level tiling - Tiles 34; TWO-PLUS-REGISTER: for (int c0 = 0; c0 <= 3; c0 += 1) 35; TWO-PLUS-REGISTER: for (int c1 = 0; c1 <= 31; c1 += 1) 36; TWO-PLUS-REGISTER: // 1st level tiling - Points 37; TWO-PLUS-REGISTER: // 2nd level tiling - Tiles 38; TWO-PLUS-REGISTER: for (int c2 = 0; c2 <= 15; c2 += 1) 39; TWO-PLUS-REGISTER: for (int c3 = 0; c3 <= 1; c3 += 1) 40; TWO-PLUS-REGISTER: // 2nd level tiling - Points 41; TWO-PLUS-REGISTER: // Register tiling - Tiles 42; TWO-PLUS-REGISTER: for (int c4 = 0; c4 <= 7; c4 += 1) 43; TWO-PLUS-REGISTER: for (int c5 = 0; c5 <= 3; c5 += 1) 44; TWO-PLUS-REGISTER: // Register tiling - Points 45; TWO-PLUS-REGISTER: { 46; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 2 * c5); 47; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 2 * c5 + 1); 48; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 2 * c5); 49; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 2 * c5 + 1); 50; TWO-PLUS-REGISTER: } 51 52; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma known-parallel 53; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c0 = 0; c0 <= 3; c0 += 1) 54; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c1 = 0; c1 <= 31; c1 += 1) 55; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c2 = 0; c2 <= 15; c2 += 1) 56; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c3 = 0; c3 <= 1; c3 += 1) 57; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c4 = 0; c4 <= 7; c4 += 1) 58; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c5 = 0; c5 <= 1; c5 += 1) { 59; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: // SIMD 60; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c8 = 0; c8 <= 3; c8 += 1) 61; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 4 * c5 + c8); 62; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: // SIMD 63; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c8 = 0; c8 <= 3; c8 += 1) 64; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 4 * c5 + c8); 65; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: } 66 67target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" 68 69; Function Attrs: nounwind 70define void @rect([512 x i32]* %A) { 71entry: 72 br label %entry.split 73 74entry.split: ; preds = %entry 75 br label %for.body3.lr.ph 76 77for.body3.lr.ph: ; preds = %for.inc5, %entry.split 78 %i.0 = phi i32 [ 0, %entry.split ], [ %inc6, %for.inc5 ] 79 br label %for.body3 80 81for.body3: ; preds = %for.body3.lr.ph, %for.body3 82 %j.0 = phi i32 [ 0, %for.body3.lr.ph ], [ %inc, %for.body3 ] 83 %mul = mul nsw i32 %j.0, %i.0 84 %rem = srem i32 %mul, 42 85 %arrayidx4 = getelementptr inbounds [512 x i32], [512 x i32]* %A, i32 %i.0, i32 %j.0 86 store i32 %rem, i32* %arrayidx4, align 4 87 %inc = add nsw i32 %j.0, 1 88 %cmp2 = icmp slt i32 %inc, 512 89 br i1 %cmp2, label %for.body3, label %for.inc5 90 91for.inc5: ; preds = %for.body3 92 %inc6 = add nsw i32 %i.0, 1 93 %cmp = icmp slt i32 %inc6, 1024 94 br i1 %cmp, label %for.body3.lr.ph, label %for.end7 95 96for.end7: ; preds = %for.inc5 97 ret void 98} 99