18a34e30dSSimon Pilgrim; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
28a34e30dSSimon Pilgrim; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s  -loop-vectorize -dce -instcombine -S | FileCheck -check-prefix=GFX9 %s
38a34e30dSSimon Pilgrim; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s  -loop-vectorize -dce -instcombine -S | FileCheck -check-prefix=VI %s
48a34e30dSSimon Pilgrim; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s  -loop-vectorize -dce -instcombine -S | FileCheck -check-prefix=CI %s
5cee313d2SEric Christopher
6cee313d2SEric Christopherdefine half @vectorize_v2f16_loop(half addrspace(1)* noalias %s) {
78a34e30dSSimon Pilgrim; GFX9-LABEL: @vectorize_v2f16_loop(
88a34e30dSSimon Pilgrim; GFX9-NEXT:  entry:
98a34e30dSSimon Pilgrim; GFX9-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
108a34e30dSSimon Pilgrim; GFX9:       vector.ph:
118a34e30dSSimon Pilgrim; GFX9-NEXT:    br label [[VECTOR_BODY:%.*]]
128a34e30dSSimon Pilgrim; GFX9:       vector.body:
138a34e30dSSimon Pilgrim; GFX9-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
14fced87d4SStanislav Mekhanoshin; GFX9-NEXT:    [[VEC_PHI:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
15fced87d4SStanislav Mekhanoshin; GFX9-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
168a34e30dSSimon Pilgrim; GFX9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds half, half addrspace(1)* [[S:%.*]], i64 [[INDEX]]
178a34e30dSSimon Pilgrim; GFX9-NEXT:    [[TMP1:%.*]] = bitcast half addrspace(1)* [[TMP0]] to <2 x half> addrspace(1)*
188a34e30dSSimon Pilgrim; GFX9-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x half>, <2 x half> addrspace(1)* [[TMP1]], align 2
198a34e30dSSimon Pilgrim; GFX9-NEXT:    [[TMP2:%.*]] = getelementptr inbounds half, half addrspace(1)* [[TMP0]], i64 2
208a34e30dSSimon Pilgrim; GFX9-NEXT:    [[TMP3:%.*]] = bitcast half addrspace(1)* [[TMP2]] to <2 x half> addrspace(1)*
21fced87d4SStanislav Mekhanoshin; GFX9-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x half>, <2 x half> addrspace(1)* [[TMP3]], align 2
22fced87d4SStanislav Mekhanoshin; GFX9-NEXT:    [[TMP4]] = fadd fast <2 x half> [[VEC_PHI]], [[WIDE_LOAD]]
23fced87d4SStanislav Mekhanoshin; GFX9-NEXT:    [[TMP5]] = fadd fast <2 x half> [[VEC_PHI1]], [[WIDE_LOAD2]]
24fced87d4SStanislav Mekhanoshin; GFX9-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
25fced87d4SStanislav Mekhanoshin; GFX9-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
26fced87d4SStanislav Mekhanoshin; GFX9-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
278a34e30dSSimon Pilgrim; GFX9:       middle.block:
28fced87d4SStanislav Mekhanoshin; GFX9-NEXT:    [[BIN_RDX:%.*]] = fadd fast <2 x half> [[TMP5]], [[TMP4]]
29fced87d4SStanislav Mekhanoshin; GFX9-NEXT:    [[TMP7:%.*]] = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH8000, <2 x half> [[BIN_RDX]])
308a34e30dSSimon Pilgrim; GFX9-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
318a34e30dSSimon Pilgrim; GFX9:       scalar.ph:
328a34e30dSSimon Pilgrim; GFX9-NEXT:    br label [[FOR_BODY:%.*]]
338a34e30dSSimon Pilgrim; GFX9:       for.body:
34*a30e77b6SNuno Lopes; GFX9-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
358a34e30dSSimon Pilgrim; GFX9:       for.end:
36*a30e77b6SNuno Lopes; GFX9-NEXT:    [[ADD_LCSSA:%.*]] = phi half [ poison, [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
378a34e30dSSimon Pilgrim; GFX9-NEXT:    ret half [[ADD_LCSSA]]
388a34e30dSSimon Pilgrim;
398a34e30dSSimon Pilgrim; VI-LABEL: @vectorize_v2f16_loop(
408a34e30dSSimon Pilgrim; VI-NEXT:  entry:
418a34e30dSSimon Pilgrim; VI-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
428a34e30dSSimon Pilgrim; VI:       vector.ph:
438a34e30dSSimon Pilgrim; VI-NEXT:    br label [[VECTOR_BODY:%.*]]
448a34e30dSSimon Pilgrim; VI:       vector.body:
458a34e30dSSimon Pilgrim; VI-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
46fced87d4SStanislav Mekhanoshin; VI-NEXT:    [[VEC_PHI:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
47fced87d4SStanislav Mekhanoshin; VI-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
488a34e30dSSimon Pilgrim; VI-NEXT:    [[TMP0:%.*]] = getelementptr inbounds half, half addrspace(1)* [[S:%.*]], i64 [[INDEX]]
498a34e30dSSimon Pilgrim; VI-NEXT:    [[TMP1:%.*]] = bitcast half addrspace(1)* [[TMP0]] to <2 x half> addrspace(1)*
508a34e30dSSimon Pilgrim; VI-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x half>, <2 x half> addrspace(1)* [[TMP1]], align 2
518a34e30dSSimon Pilgrim; VI-NEXT:    [[TMP2:%.*]] = getelementptr inbounds half, half addrspace(1)* [[TMP0]], i64 2
528a34e30dSSimon Pilgrim; VI-NEXT:    [[TMP3:%.*]] = bitcast half addrspace(1)* [[TMP2]] to <2 x half> addrspace(1)*
53fced87d4SStanislav Mekhanoshin; VI-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x half>, <2 x half> addrspace(1)* [[TMP3]], align 2
54fced87d4SStanislav Mekhanoshin; VI-NEXT:    [[TMP4]] = fadd fast <2 x half> [[VEC_PHI]], [[WIDE_LOAD]]
55fced87d4SStanislav Mekhanoshin; VI-NEXT:    [[TMP5]] = fadd fast <2 x half> [[VEC_PHI1]], [[WIDE_LOAD2]]
56fced87d4SStanislav Mekhanoshin; VI-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
57fced87d4SStanislav Mekhanoshin; VI-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
58fced87d4SStanislav Mekhanoshin; VI-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
598a34e30dSSimon Pilgrim; VI:       middle.block:
60fced87d4SStanislav Mekhanoshin; VI-NEXT:    [[BIN_RDX:%.*]] = fadd fast <2 x half> [[TMP5]], [[TMP4]]
61fced87d4SStanislav Mekhanoshin; VI-NEXT:    [[TMP7:%.*]] = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH8000, <2 x half> [[BIN_RDX]])
628a34e30dSSimon Pilgrim; VI-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
638a34e30dSSimon Pilgrim; VI:       scalar.ph:
648a34e30dSSimon Pilgrim; VI-NEXT:    br label [[FOR_BODY:%.*]]
658a34e30dSSimon Pilgrim; VI:       for.body:
66*a30e77b6SNuno Lopes; VI-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
678a34e30dSSimon Pilgrim; VI:       for.end:
68*a30e77b6SNuno Lopes; VI-NEXT:    [[ADD_LCSSA:%.*]] = phi half [ poison, [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
698a34e30dSSimon Pilgrim; VI-NEXT:    ret half [[ADD_LCSSA]]
708a34e30dSSimon Pilgrim;
718a34e30dSSimon Pilgrim; CI-LABEL: @vectorize_v2f16_loop(
728a34e30dSSimon Pilgrim; CI-NEXT:  entry:
738a34e30dSSimon Pilgrim; CI-NEXT:    br label [[FOR_BODY:%.*]]
748a34e30dSSimon Pilgrim; CI:       for.body:
758a34e30dSSimon Pilgrim; CI-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
768a34e30dSSimon Pilgrim; CI-NEXT:    [[Q_04:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
778a34e30dSSimon Pilgrim; CI-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds half, half addrspace(1)* [[S:%.*]], i64 [[INDVARS_IV]]
788a34e30dSSimon Pilgrim; CI-NEXT:    [[TMP0:%.*]] = load half, half addrspace(1)* [[ARRAYIDX]], align 2
798a34e30dSSimon Pilgrim; CI-NEXT:    [[ADD]] = fadd fast half [[Q_04]], [[TMP0]]
808a34e30dSSimon Pilgrim; CI-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
818a34e30dSSimon Pilgrim; CI-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 256
828a34e30dSSimon Pilgrim; CI-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
838a34e30dSSimon Pilgrim; CI:       for.end:
848a34e30dSSimon Pilgrim; CI-NEXT:    ret half [[ADD]]
858a34e30dSSimon Pilgrim;
86cee313d2SEric Christopherentry:
87cee313d2SEric Christopher  br label %for.body
88cee313d2SEric Christopher
89cee313d2SEric Christopherfor.body:
90cee313d2SEric Christopher  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
91cee313d2SEric Christopher  %q.04 = phi half [ 0.0, %entry ], [ %add, %for.body ]
92cee313d2SEric Christopher  %arrayidx = getelementptr inbounds half, half addrspace(1)* %s, i64 %indvars.iv
93cee313d2SEric Christopher  %0 = load half, half addrspace(1)* %arrayidx, align 2
94cee313d2SEric Christopher  %add = fadd fast half %q.04, %0
95cee313d2SEric Christopher  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
96cee313d2SEric Christopher  %exitcond = icmp eq i64 %indvars.iv.next, 256
97cee313d2SEric Christopher  br i1 %exitcond, label %for.end, label %for.body
98cee313d2SEric Christopher
99cee313d2SEric Christopherfor.end:
100cee313d2SEric Christopher  %add.lcssa = phi half [ %add, %for.body ]
101cee313d2SEric Christopher  ret half %add.lcssa
102cee313d2SEric Christopher}
103