1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -mtriple=hexagon -S -hexagon-vc -instcombine < %s | FileCheck %s
3
4; Check that Hexagon Vector Combine propagates (TBAA) metadata to the
5; generated output. (Use instcombine to clean the output up a bit.)
6
7target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
8target triple = "hexagon"
9
10; Two unaligned loads, both with the same TBAA tag.
11;
12define <64 x i16> @f0(i16* %a0, i32 %a1) #0 {
13; CHECK-LABEL: @f0(
14; CHECK-NEXT:  b0:
15; CHECK-NEXT:    [[V0:%.*]] = add i32 [[A1:%.*]], 64
16; CHECK-NEXT:    [[V1:%.*]] = getelementptr i16, i16* [[A0:%.*]], i32 [[V0]]
17; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i16* [[V1]] to i32
18; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], -128
19; CHECK-NEXT:    [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to <64 x i16>*
20; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint i16* [[V1]] to i32
21; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i32 [[TMP1]] to <32 x i32>*
22; CHECK-NEXT:    [[TMP5:%.*]] = load <32 x i32>, <32 x i32>* [[TMP4]], align 128, !tbaa [[TBAA0:![0-9]+]]
23; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 1
24; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <64 x i16>* [[TMP6]] to <128 x i8>*
25; CHECK-NEXT:    [[TMP8:%.*]] = load <128 x i8>, <128 x i8>* [[TMP7]], align 128, !tbaa [[TBAA0]]
26; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 2
27; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <64 x i16>* [[TMP9]] to <32 x i32>*
28; CHECK-NEXT:    [[TMP11:%.*]] = load <32 x i32>, <32 x i32>* [[TMP10]], align 128, !tbaa [[TBAA0]]
29; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <128 x i8> [[TMP8]] to <32 x i32>
30; CHECK-NEXT:    [[TMP13:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP12]], <32 x i32> [[TMP5]], i32 [[TMP3]])
31; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <128 x i8> [[TMP8]] to <32 x i32>
32; CHECK-NEXT:    [[TMP15:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP11]], <32 x i32> [[TMP14]], i32 [[TMP3]])
33; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <32 x i32> [[TMP13]] to <64 x i16>
34; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <32 x i32> [[TMP15]] to <64 x i16>
35; CHECK-NEXT:    [[V8:%.*]] = add <64 x i16> [[TMP16]], [[TMP17]]
36; CHECK-NEXT:    ret <64 x i16> [[V8]]
37;
38b0:
39  %v0 = add i32 %a1, 64
40  %v1 = getelementptr i16, i16* %a0, i32 %v0
41  %v2 = bitcast i16* %v1 to <64 x i16>*
42  %v3 = load <64 x i16>, <64 x i16>* %v2, align 2, !tbaa !0
43  %v4 = add i32 %a1, 128
44  %v5 = getelementptr i16, i16* %a0, i32 %v4
45  %v6 = bitcast i16* %v5 to <64 x i16>*
46  %v7 = load <64 x i16>, <64 x i16>* %v6, align 2, !tbaa !0
47  %v8 = add <64 x i16> %v3, %v7
48  ret <64 x i16> %v8
49}
50
51; Two unaligned loads, only one with a TBAA tag.
52;
53define <64 x i16> @f1(i16* %a0, i32 %a1) #0 {
54; CHECK-LABEL: @f1(
55; CHECK-NEXT:  b0:
56; CHECK-NEXT:    [[V0:%.*]] = add i32 [[A1:%.*]], 64
57; CHECK-NEXT:    [[V1:%.*]] = getelementptr i16, i16* [[A0:%.*]], i32 [[V0]]
58; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i16* [[V1]] to i32
59; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], -128
60; CHECK-NEXT:    [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to <64 x i16>*
61; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint i16* [[V1]] to i32
62; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i32 [[TMP1]] to <32 x i32>*
63; CHECK-NEXT:    [[TMP5:%.*]] = load <32 x i32>, <32 x i32>* [[TMP4]], align 128, !tbaa [[TBAA0]]
64; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 1
65; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <64 x i16>* [[TMP6]] to <128 x i8>*
66; CHECK-NEXT:    [[TMP8:%.*]] = load <128 x i8>, <128 x i8>* [[TMP7]], align 128
67; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 2
68; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <64 x i16>* [[TMP9]] to <32 x i32>*
69; CHECK-NEXT:    [[TMP11:%.*]] = load <32 x i32>, <32 x i32>* [[TMP10]], align 128
70; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <128 x i8> [[TMP8]] to <32 x i32>
71; CHECK-NEXT:    [[TMP13:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP12]], <32 x i32> [[TMP5]], i32 [[TMP3]])
72; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <128 x i8> [[TMP8]] to <32 x i32>
73; CHECK-NEXT:    [[TMP15:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP11]], <32 x i32> [[TMP14]], i32 [[TMP3]])
74; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <32 x i32> [[TMP13]] to <64 x i16>
75; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <32 x i32> [[TMP15]] to <64 x i16>
76; CHECK-NEXT:    [[V8:%.*]] = add <64 x i16> [[TMP16]], [[TMP17]]
77; CHECK-NEXT:    ret <64 x i16> [[V8]]
78;
79b0:
80  %v0 = add i32 %a1, 64
81  %v1 = getelementptr i16, i16* %a0, i32 %v0
82  %v2 = bitcast i16* %v1 to <64 x i16>*
83  %v3 = load <64 x i16>, <64 x i16>* %v2, align 2, !tbaa !0
84  %v4 = add i32 %a1, 128
85  %v5 = getelementptr i16, i16* %a0, i32 %v4
86  %v6 = bitcast i16* %v5 to <64 x i16>*
87  %v7 = load <64 x i16>, <64 x i16>* %v6, align 2
88  %v8 = add <64 x i16> %v3, %v7
89  ret <64 x i16> %v8
90}
91
92; Two unaligned loads, with different TBAA tags.
93;
94define <64 x i16> @f2(i16* %a0, i32 %a1) #0 {
95; CHECK-LABEL: @f2(
96; CHECK-NEXT:  b0:
97; CHECK-NEXT:    [[V0:%.*]] = add i32 [[A1:%.*]], 64
98; CHECK-NEXT:    [[V1:%.*]] = getelementptr i16, i16* [[A0:%.*]], i32 [[V0]]
99; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i16* [[V1]] to i32
100; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], -128
101; CHECK-NEXT:    [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to <64 x i16>*
102; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint i16* [[V1]] to i32
103; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i32 [[TMP1]] to <32 x i32>*
104; CHECK-NEXT:    [[TMP5:%.*]] = load <32 x i32>, <32 x i32>* [[TMP4]], align 128, !tbaa [[TBAA0]]
105; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 1
106; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <64 x i16>* [[TMP6]] to <128 x i8>*
107; CHECK-NEXT:    [[TMP8:%.*]] = load <128 x i8>, <128 x i8>* [[TMP7]], align 128
108; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 2
109; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <64 x i16>* [[TMP9]] to <32 x i32>*
110; CHECK-NEXT:    [[TMP11:%.*]] = load <32 x i32>, <32 x i32>* [[TMP10]], align 128, !tbaa [[TBAA3:![0-9]+]]
111; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <128 x i8> [[TMP8]] to <32 x i32>
112; CHECK-NEXT:    [[TMP13:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP12]], <32 x i32> [[TMP5]], i32 [[TMP3]])
113; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <128 x i8> [[TMP8]] to <32 x i32>
114; CHECK-NEXT:    [[TMP15:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP11]], <32 x i32> [[TMP14]], i32 [[TMP3]])
115; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <32 x i32> [[TMP13]] to <64 x i16>
116; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <32 x i32> [[TMP15]] to <64 x i16>
117; CHECK-NEXT:    [[V8:%.*]] = add <64 x i16> [[TMP16]], [[TMP17]]
118; CHECK-NEXT:    ret <64 x i16> [[V8]]
119;
120b0:
121  %v0 = add i32 %a1, 64
122  %v1 = getelementptr i16, i16* %a0, i32 %v0
123  %v2 = bitcast i16* %v1 to <64 x i16>*
124  %v3 = load <64 x i16>, <64 x i16>* %v2, align 2, !tbaa !0
125  %v4 = add i32 %a1, 128
126  %v5 = getelementptr i16, i16* %a0, i32 %v4
127  %v6 = bitcast i16* %v5 to <64 x i16>*
128  %v7 = load <64 x i16>, <64 x i16>* %v6, align 2, !tbaa !3
129  %v8 = add <64 x i16> %v3, %v7
130  ret <64 x i16> %v8
131}
132
133; Two unaligned stores, both with the same TBAA tag.
134;
135define void @f3(i16* %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 {
136; CHECK-LABEL: @f3(
137; CHECK-NEXT:  b0:
138; CHECK-NEXT:    [[V0:%.*]] = add i32 [[A1:%.*]], 64
139; CHECK-NEXT:    [[V1:%.*]] = getelementptr i16, i16* [[A0:%.*]], i32 [[V0]]
140; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i16* [[V1]] to i32
141; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], -128
142; CHECK-NEXT:    [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to <64 x i16>*
143; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint i16* [[V1]] to i32
144; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32>
145; CHECK-NEXT:    [[TMP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[TMP4]], <32 x i32> undef, i32 [[TMP3]])
146; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <32 x i32> [[TMP5]] to <128 x i8>
147; CHECK-NEXT:    [[TMP7:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> zeroinitializer, i32 [[TMP3]])
148; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <32 x i32> [[TMP7]] to <128 x i8>
149; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <64 x i16> [[A3:%.*]] to <32 x i32>
150; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <64 x i16> [[A2]] to <32 x i32>
151; CHECK-NEXT:    [[TMP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[TMP9]], <32 x i32> [[TMP10]], i32 [[TMP3]])
152; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <32 x i32> [[TMP11]] to <128 x i8>
153; CHECK-NEXT:    [[TMP13:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[TMP3]])
154; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <32 x i32> [[TMP13]] to <128 x i8>
155; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <64 x i16> [[A3]] to <32 x i32>
156; CHECK-NEXT:    [[TMP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> undef, <32 x i32> [[TMP15]], i32 [[TMP3]])
157; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <32 x i32> [[TMP16]] to <128 x i8>
158; CHECK-NEXT:    [[TMP18:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[TMP3]])
159; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <32 x i32> [[TMP18]] to <128 x i8>
160; CHECK-NEXT:    [[TMP20:%.*]] = inttoptr i32 [[TMP1]] to <128 x i8>*
161; CHECK-NEXT:    [[TMP21:%.*]] = trunc <128 x i8> [[TMP8]] to <128 x i1>
162; CHECK-NEXT:    call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP6]], <128 x i8>* [[TMP20]], i32 128, <128 x i1> [[TMP21]]), !tbaa [[TBAA5:![0-9]+]]
163; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 1
164; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <64 x i16>* [[TMP22]] to <128 x i8>*
165; CHECK-NEXT:    [[TMP24:%.*]] = trunc <128 x i8> [[TMP14]] to <128 x i1>
166; CHECK-NEXT:    call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP12]], <128 x i8>* [[TMP23]], i32 128, <128 x i1> [[TMP24]]), !tbaa [[TBAA5]]
167; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 2
168; CHECK-NEXT:    [[TMP26:%.*]] = bitcast <64 x i16>* [[TMP25]] to <128 x i8>*
169; CHECK-NEXT:    [[TMP27:%.*]] = trunc <128 x i8> [[TMP19]] to <128 x i1>
170; CHECK-NEXT:    call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP17]], <128 x i8>* [[TMP26]], i32 128, <128 x i1> [[TMP27]]), !tbaa [[TBAA5]]
171; CHECK-NEXT:    ret void
172;
173b0:
174  %v0 = add i32 %a1, 64
175  %v1 = getelementptr i16, i16* %a0, i32 %v0
176  %v2 = bitcast i16* %v1 to <64 x i16>*
177  store <64 x i16> %a2, <64 x i16>* %v2, align 2, !tbaa !5
178  %v3 = add i32 %a1, 128
179  %v4 = getelementptr i16, i16* %a0, i32 %v3
180  %v5 = bitcast i16* %v4 to <64 x i16>*
181  store <64 x i16> %a3, <64 x i16>* %v5, align 2, !tbaa !5
182  ret void
183}
184
185; Two unaligned stores, only one with a TBAA tag.
186;
187define void @f4(i16* %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 {
188; CHECK-LABEL: @f4(
189; CHECK-NEXT:  b0:
190; CHECK-NEXT:    [[V0:%.*]] = add i32 [[A1:%.*]], 64
191; CHECK-NEXT:    [[V1:%.*]] = getelementptr i16, i16* [[A0:%.*]], i32 [[V0]]
192; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i16* [[V1]] to i32
193; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], -128
194; CHECK-NEXT:    [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to <64 x i16>*
195; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint i16* [[V1]] to i32
196; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32>
197; CHECK-NEXT:    [[TMP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[TMP4]], <32 x i32> undef, i32 [[TMP3]])
198; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <32 x i32> [[TMP5]] to <128 x i8>
199; CHECK-NEXT:    [[TMP7:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> zeroinitializer, i32 [[TMP3]])
200; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <32 x i32> [[TMP7]] to <128 x i8>
201; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <64 x i16> [[A3:%.*]] to <32 x i32>
202; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <64 x i16> [[A2]] to <32 x i32>
203; CHECK-NEXT:    [[TMP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[TMP9]], <32 x i32> [[TMP10]], i32 [[TMP3]])
204; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <32 x i32> [[TMP11]] to <128 x i8>
205; CHECK-NEXT:    [[TMP13:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[TMP3]])
206; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <32 x i32> [[TMP13]] to <128 x i8>
207; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <64 x i16> [[A3]] to <32 x i32>
208; CHECK-NEXT:    [[TMP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> undef, <32 x i32> [[TMP15]], i32 [[TMP3]])
209; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <32 x i32> [[TMP16]] to <128 x i8>
210; CHECK-NEXT:    [[TMP18:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[TMP3]])
211; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <32 x i32> [[TMP18]] to <128 x i8>
212; CHECK-NEXT:    [[TMP20:%.*]] = inttoptr i32 [[TMP1]] to <128 x i8>*
213; CHECK-NEXT:    [[TMP21:%.*]] = trunc <128 x i8> [[TMP8]] to <128 x i1>
214; CHECK-NEXT:    call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP6]], <128 x i8>* [[TMP20]], i32 128, <128 x i1> [[TMP21]])
215; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 1
216; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <64 x i16>* [[TMP22]] to <128 x i8>*
217; CHECK-NEXT:    [[TMP24:%.*]] = trunc <128 x i8> [[TMP14]] to <128 x i1>
218; CHECK-NEXT:    call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP12]], <128 x i8>* [[TMP23]], i32 128, <128 x i1> [[TMP24]])
219; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 2
220; CHECK-NEXT:    [[TMP26:%.*]] = bitcast <64 x i16>* [[TMP25]] to <128 x i8>*
221; CHECK-NEXT:    [[TMP27:%.*]] = trunc <128 x i8> [[TMP19]] to <128 x i1>
222; CHECK-NEXT:    call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP17]], <128 x i8>* [[TMP26]], i32 128, <128 x i1> [[TMP27]]), !tbaa [[TBAA5]]
223; CHECK-NEXT:    ret void
224;
225b0:
226  %v0 = add i32 %a1, 64
227  %v1 = getelementptr i16, i16* %a0, i32 %v0
228  %v2 = bitcast i16* %v1 to <64 x i16>*
229  store <64 x i16> %a2, <64 x i16>* %v2, align 2
230  %v3 = add i32 %a1, 128
231  %v4 = getelementptr i16, i16* %a0, i32 %v3
232  %v5 = bitcast i16* %v4 to <64 x i16>*
233  store <64 x i16> %a3, <64 x i16>* %v5, align 2, !tbaa !5
234  ret void
235}
236
237; Two unaligned store, with different TBAA tags.
238;
239define void @f5(i16* %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 {
240; CHECK-LABEL: @f5(
241; CHECK-NEXT:  b0:
242; CHECK-NEXT:    [[V0:%.*]] = add i32 [[A1:%.*]], 64
243; CHECK-NEXT:    [[V1:%.*]] = getelementptr i16, i16* [[A0:%.*]], i32 [[V0]]
244; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i16* [[V1]] to i32
245; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], -128
246; CHECK-NEXT:    [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to <64 x i16>*
247; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint i16* [[V1]] to i32
248; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32>
249; CHECK-NEXT:    [[TMP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[TMP4]], <32 x i32> undef, i32 [[TMP3]])
250; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <32 x i32> [[TMP5]] to <128 x i8>
251; CHECK-NEXT:    [[TMP7:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> zeroinitializer, i32 [[TMP3]])
252; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <32 x i32> [[TMP7]] to <128 x i8>
253; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <64 x i16> [[A3:%.*]] to <32 x i32>
254; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <64 x i16> [[A2]] to <32 x i32>
255; CHECK-NEXT:    [[TMP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[TMP9]], <32 x i32> [[TMP10]], i32 [[TMP3]])
256; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <32 x i32> [[TMP11]] to <128 x i8>
257; CHECK-NEXT:    [[TMP13:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[TMP3]])
258; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <32 x i32> [[TMP13]] to <128 x i8>
259; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <64 x i16> [[A3]] to <32 x i32>
260; CHECK-NEXT:    [[TMP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> undef, <32 x i32> [[TMP15]], i32 [[TMP3]])
261; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <32 x i32> [[TMP16]] to <128 x i8>
262; CHECK-NEXT:    [[TMP18:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[TMP3]])
263; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <32 x i32> [[TMP18]] to <128 x i8>
264; CHECK-NEXT:    [[TMP20:%.*]] = inttoptr i32 [[TMP1]] to <128 x i8>*
265; CHECK-NEXT:    [[TMP21:%.*]] = trunc <128 x i8> [[TMP8]] to <128 x i1>
266; CHECK-NEXT:    call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP6]], <128 x i8>* [[TMP20]], i32 128, <128 x i1> [[TMP21]]), !tbaa [[TBAA5]]
267; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 1
268; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <64 x i16>* [[TMP22]] to <128 x i8>*
269; CHECK-NEXT:    [[TMP24:%.*]] = trunc <128 x i8> [[TMP14]] to <128 x i1>
270; CHECK-NEXT:    call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP12]], <128 x i8>* [[TMP23]], i32 128, <128 x i1> [[TMP24]])
271; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 2
272; CHECK-NEXT:    [[TMP26:%.*]] = bitcast <64 x i16>* [[TMP25]] to <128 x i8>*
273; CHECK-NEXT:    [[TMP27:%.*]] = trunc <128 x i8> [[TMP19]] to <128 x i1>
274; CHECK-NEXT:    call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP17]], <128 x i8>* [[TMP26]], i32 128, <128 x i1> [[TMP27]]), !tbaa [[TBAA7:![0-9]+]]
275; CHECK-NEXT:    ret void
276;
277b0:
278  %v0 = add i32 %a1, 64
279  %v1 = getelementptr i16, i16* %a0, i32 %v0
280  %v2 = bitcast i16* %v1 to <64 x i16>*
281  store <64 x i16> %a2, <64 x i16>* %v2, align 2, !tbaa !5
282  %v3 = add i32 %a1, 128
283  %v4 = getelementptr i16, i16* %a0, i32 %v3
284  %v5 = bitcast i16* %v4 to <64 x i16>*
285  store <64 x i16> %a3, <64 x i16>* %v5, align 2, !tbaa !7
286  ret void
287}
288
289attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b" }
290
291!0 = !{!1, !1, i64 0}
292!1 = !{!"load type 1", !2}
293!2 = !{!"Simple C/C++ TBAA"}
294!3 = !{!4, !4, i64 0}
295!4 = !{!"load type 2", !2}
296!5 = !{!6, !6, i64 0}
297!6 = !{!"store type 1", !2}
298!7 = !{!8, !8, i64 0}
299!8 = !{!"store type 2", !2}
300