1; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -basic-aa -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN,GFX7 %s 2; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -basic-aa -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s 3 4target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 5 6; Checks that there is no crash when there are multiple tails 7; for a the same head starting a chain. 8@0 = internal addrspace(3) global [16384 x i32] undef 9 10; GCN-LABEL: @no_crash( 11; GCN: store <2 x i32> zeroinitializer 12; GCN: store i32 0 13; GCN: store i32 0 14 15define amdgpu_kernel void @no_crash(i32 %arg) { 16 %tmp2 = add i32 %arg, 14 17 %tmp3 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* @0, i32 0, i32 %tmp2 18 %tmp4 = add i32 %arg, 15 19 %tmp5 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* @0, i32 0, i32 %tmp4 20 21 store i32 0, i32 addrspace(3)* %tmp3, align 4 22 store i32 0, i32 addrspace(3)* %tmp5, align 4 23 store i32 0, i32 addrspace(3)* %tmp5, align 4 24 store i32 0, i32 addrspace(3)* %tmp5, align 4 25 26 ret void 27} 28 29; Check adjiacent memory locations are properly matched and the 30; longest chain vectorized 31 32; GCN-LABEL: @interleave_get_longest 33 34; GFX7: load <2 x i32> 35; GFX7: load i32 36; GFX7: store <2 x i32> zeroinitializer 37; GFX7: load i32 38; GFX7: load <2 x i32> 39; GFX7: load i32 40; GFX7: load i32 41 42; GFX9: load <4 x i32> 43; GFX9: load i32 44; GFX9: store <2 x i32> zeroinitializer 45; GFX9: load i32 46; GFX9: load i32 47; GFX9: load i32 48 49define amdgpu_kernel void @interleave_get_longest(i32 %arg) { 50 %a1 = add i32 %arg, 1 51 %a2 = add i32 %arg, 2 52 %a3 = add i32 %arg, 3 53 %a4 = add i32 %arg, 4 54 %tmp1 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* @0, i32 0, i32 %arg 55 %tmp2 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* @0, i32 0, i32 %a1 56 %tmp3 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* @0, i32 0, i32 %a2 57 %tmp4 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* @0, i32 0, i32 %a3 58 %tmp5 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* @0, i32 0, i32 %a4 59 60 %l1 = load i32, i32 addrspace(3)* %tmp2, align 4 61 %l2 = load i32, i32 addrspace(3)* %tmp1, align 4 62 store i32 0, i32 addrspace(3)* %tmp2, align 4 63 store i32 0, i32 addrspace(3)* %tmp1, align 4 64 %l3 = load i32, i32 addrspace(3)* %tmp2, align 4 65 %l4 = load i32, i32 addrspace(3)* %tmp3, align 4 66 %l5 = load i32, i32 addrspace(3)* %tmp4, align 4 67 %l6 = load i32, i32 addrspace(3)* %tmp5, align 4 68 %l7 = load i32, i32 addrspace(3)* %tmp5, align 4 69 %l8 = load i32, i32 addrspace(3)* %tmp5, align 4 70 71 ret void 72} 73 74