1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -mtriple=hexagon -S -hexagon-vc -instcombine < %s | FileCheck %s 3 4; Check that Hexagon Vector Combine propagates (TBAA) metadata to the 5; generated output. (Use instcombine to clean the output up a bit.) 6 7target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" 8target triple = "hexagon" 9 10; Two unaligned loads, both with the same TBAA tag. 11; 12define <64 x i16> @f0(i16* %a0, i32 %a1) #0 { 13; CHECK-LABEL: @f0( 14; CHECK-NEXT: b0: 15; CHECK-NEXT: [[V0:%.*]] = add i32 [[A1:%.*]], 64 16; CHECK-NEXT: [[V1:%.*]] = getelementptr i16, i16* [[A0:%.*]], i32 [[V0]] 17; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[V1]] to i32 18; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], -128 19; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to <64 x i16>* 20; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint i16* [[V1]] to i32 21; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP1]] to <32 x i32>* 22; CHECK-NEXT: [[TMP5:%.*]] = load <32 x i32>, <32 x i32>* [[TMP4]], align 128, !tbaa [[TBAA0:![0-9]+]] 23; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 1 24; CHECK-NEXT: [[TMP7:%.*]] = bitcast <64 x i16>* [[TMP6]] to <128 x i8>* 25; CHECK-NEXT: [[TMP8:%.*]] = load <128 x i8>, <128 x i8>* [[TMP7]], align 128, !tbaa [[TBAA0]] 26; CHECK-NEXT: [[TMP9:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 2 27; CHECK-NEXT: [[TMP10:%.*]] = bitcast <64 x i16>* [[TMP9]] to <32 x i32>* 28; CHECK-NEXT: [[TMP11:%.*]] = load <32 x i32>, <32 x i32>* [[TMP10]], align 128, !tbaa [[TBAA0]] 29; CHECK-NEXT: [[TMP12:%.*]] = bitcast <128 x i8> [[TMP8]] to <32 x i32> 30; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP12]], <32 x i32> [[TMP5]], i32 [[TMP3]]) 31; CHECK-NEXT: [[TMP14:%.*]] = bitcast <128 x i8> [[TMP8]] to <32 x i32> 32; CHECK-NEXT: [[TMP15:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP11]], <32 x i32> [[TMP14]], i32 [[TMP3]]) 33; CHECK-NEXT: [[TMP16:%.*]] = bitcast <32 x i32> [[TMP13]] to <64 x i16> 34; CHECK-NEXT: [[TMP17:%.*]] = bitcast <32 x i32> [[TMP15]] to <64 x i16> 35; CHECK-NEXT: [[V8:%.*]] = add <64 x i16> [[TMP16]], [[TMP17]] 36; CHECK-NEXT: ret <64 x i16> [[V8]] 37; 38b0: 39 %v0 = add i32 %a1, 64 40 %v1 = getelementptr i16, i16* %a0, i32 %v0 41 %v2 = bitcast i16* %v1 to <64 x i16>* 42 %v3 = load <64 x i16>, <64 x i16>* %v2, align 2, !tbaa !0 43 %v4 = add i32 %a1, 128 44 %v5 = getelementptr i16, i16* %a0, i32 %v4 45 %v6 = bitcast i16* %v5 to <64 x i16>* 46 %v7 = load <64 x i16>, <64 x i16>* %v6, align 2, !tbaa !0 47 %v8 = add <64 x i16> %v3, %v7 48 ret <64 x i16> %v8 49} 50 51; Two unaligned loads, only one with a TBAA tag. 52; 53define <64 x i16> @f1(i16* %a0, i32 %a1) #0 { 54; CHECK-LABEL: @f1( 55; CHECK-NEXT: b0: 56; CHECK-NEXT: [[V0:%.*]] = add i32 [[A1:%.*]], 64 57; CHECK-NEXT: [[V1:%.*]] = getelementptr i16, i16* [[A0:%.*]], i32 [[V0]] 58; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[V1]] to i32 59; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], -128 60; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to <64 x i16>* 61; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint i16* [[V1]] to i32 62; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP1]] to <32 x i32>* 63; CHECK-NEXT: [[TMP5:%.*]] = load <32 x i32>, <32 x i32>* [[TMP4]], align 128, !tbaa [[TBAA0]] 64; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 1 65; CHECK-NEXT: [[TMP7:%.*]] = bitcast <64 x i16>* [[TMP6]] to <128 x i8>* 66; CHECK-NEXT: [[TMP8:%.*]] = load <128 x i8>, <128 x i8>* [[TMP7]], align 128 67; CHECK-NEXT: [[TMP9:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 2 68; CHECK-NEXT: [[TMP10:%.*]] = bitcast <64 x i16>* [[TMP9]] to <32 x i32>* 69; CHECK-NEXT: [[TMP11:%.*]] = load <32 x i32>, <32 x i32>* [[TMP10]], align 128 70; CHECK-NEXT: [[TMP12:%.*]] = bitcast <128 x i8> [[TMP8]] to <32 x i32> 71; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP12]], <32 x i32> [[TMP5]], i32 [[TMP3]]) 72; CHECK-NEXT: [[TMP14:%.*]] = bitcast <128 x i8> [[TMP8]] to <32 x i32> 73; CHECK-NEXT: [[TMP15:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP11]], <32 x i32> [[TMP14]], i32 [[TMP3]]) 74; CHECK-NEXT: [[TMP16:%.*]] = bitcast <32 x i32> [[TMP13]] to <64 x i16> 75; CHECK-NEXT: [[TMP17:%.*]] = bitcast <32 x i32> [[TMP15]] to <64 x i16> 76; CHECK-NEXT: [[V8:%.*]] = add <64 x i16> [[TMP16]], [[TMP17]] 77; CHECK-NEXT: ret <64 x i16> [[V8]] 78; 79b0: 80 %v0 = add i32 %a1, 64 81 %v1 = getelementptr i16, i16* %a0, i32 %v0 82 %v2 = bitcast i16* %v1 to <64 x i16>* 83 %v3 = load <64 x i16>, <64 x i16>* %v2, align 2, !tbaa !0 84 %v4 = add i32 %a1, 128 85 %v5 = getelementptr i16, i16* %a0, i32 %v4 86 %v6 = bitcast i16* %v5 to <64 x i16>* 87 %v7 = load <64 x i16>, <64 x i16>* %v6, align 2 88 %v8 = add <64 x i16> %v3, %v7 89 ret <64 x i16> %v8 90} 91 92; Two unaligned loads, with different TBAA tags. 93; 94define <64 x i16> @f2(i16* %a0, i32 %a1) #0 { 95; CHECK-LABEL: @f2( 96; CHECK-NEXT: b0: 97; CHECK-NEXT: [[V0:%.*]] = add i32 [[A1:%.*]], 64 98; CHECK-NEXT: [[V1:%.*]] = getelementptr i16, i16* [[A0:%.*]], i32 [[V0]] 99; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[V1]] to i32 100; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], -128 101; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to <64 x i16>* 102; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint i16* [[V1]] to i32 103; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP1]] to <32 x i32>* 104; CHECK-NEXT: [[TMP5:%.*]] = load <32 x i32>, <32 x i32>* [[TMP4]], align 128, !tbaa [[TBAA0]] 105; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 1 106; CHECK-NEXT: [[TMP7:%.*]] = bitcast <64 x i16>* [[TMP6]] to <128 x i8>* 107; CHECK-NEXT: [[TMP8:%.*]] = load <128 x i8>, <128 x i8>* [[TMP7]], align 128 108; CHECK-NEXT: [[TMP9:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 2 109; CHECK-NEXT: [[TMP10:%.*]] = bitcast <64 x i16>* [[TMP9]] to <32 x i32>* 110; CHECK-NEXT: [[TMP11:%.*]] = load <32 x i32>, <32 x i32>* [[TMP10]], align 128, !tbaa [[TBAA3:![0-9]+]] 111; CHECK-NEXT: [[TMP12:%.*]] = bitcast <128 x i8> [[TMP8]] to <32 x i32> 112; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP12]], <32 x i32> [[TMP5]], i32 [[TMP3]]) 113; CHECK-NEXT: [[TMP14:%.*]] = bitcast <128 x i8> [[TMP8]] to <32 x i32> 114; CHECK-NEXT: [[TMP15:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP11]], <32 x i32> [[TMP14]], i32 [[TMP3]]) 115; CHECK-NEXT: [[TMP16:%.*]] = bitcast <32 x i32> [[TMP13]] to <64 x i16> 116; CHECK-NEXT: [[TMP17:%.*]] = bitcast <32 x i32> [[TMP15]] to <64 x i16> 117; CHECK-NEXT: [[V8:%.*]] = add <64 x i16> [[TMP16]], [[TMP17]] 118; CHECK-NEXT: ret <64 x i16> [[V8]] 119; 120b0: 121 %v0 = add i32 %a1, 64 122 %v1 = getelementptr i16, i16* %a0, i32 %v0 123 %v2 = bitcast i16* %v1 to <64 x i16>* 124 %v3 = load <64 x i16>, <64 x i16>* %v2, align 2, !tbaa !0 125 %v4 = add i32 %a1, 128 126 %v5 = getelementptr i16, i16* %a0, i32 %v4 127 %v6 = bitcast i16* %v5 to <64 x i16>* 128 %v7 = load <64 x i16>, <64 x i16>* %v6, align 2, !tbaa !3 129 %v8 = add <64 x i16> %v3, %v7 130 ret <64 x i16> %v8 131} 132 133; Two unaligned stores, both with the same TBAA tag. 134; 135define void @f3(i16* %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 { 136; CHECK-LABEL: @f3( 137; CHECK-NEXT: b0: 138; CHECK-NEXT: [[V0:%.*]] = add i32 [[A1:%.*]], 64 139; CHECK-NEXT: [[V1:%.*]] = getelementptr i16, i16* [[A0:%.*]], i32 [[V0]] 140; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[V1]] to i32 141; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], -128 142; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to <64 x i16>* 143; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint i16* [[V1]] to i32 144; CHECK-NEXT: [[TMP4:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32> 145; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[TMP4]], <32 x i32> undef, i32 [[TMP3]]) 146; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i32> [[TMP5]] to <128 x i8> 147; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> zeroinitializer, i32 [[TMP3]]) 148; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i32> [[TMP7]] to <128 x i8> 149; CHECK-NEXT: [[TMP9:%.*]] = bitcast <64 x i16> [[A3:%.*]] to <32 x i32> 150; CHECK-NEXT: [[TMP10:%.*]] = bitcast <64 x i16> [[A2]] to <32 x i32> 151; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[TMP9]], <32 x i32> [[TMP10]], i32 [[TMP3]]) 152; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x i32> [[TMP11]] to <128 x i8> 153; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[TMP3]]) 154; CHECK-NEXT: [[TMP14:%.*]] = bitcast <32 x i32> [[TMP13]] to <128 x i8> 155; CHECK-NEXT: [[TMP15:%.*]] = bitcast <64 x i16> [[A3]] to <32 x i32> 156; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> undef, <32 x i32> [[TMP15]], i32 [[TMP3]]) 157; CHECK-NEXT: [[TMP17:%.*]] = bitcast <32 x i32> [[TMP16]] to <128 x i8> 158; CHECK-NEXT: [[TMP18:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[TMP3]]) 159; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i32> [[TMP18]] to <128 x i8> 160; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP1]] to <128 x i8>* 161; CHECK-NEXT: [[TMP21:%.*]] = trunc <128 x i8> [[TMP8]] to <128 x i1> 162; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP6]], <128 x i8>* [[TMP20]], i32 128, <128 x i1> [[TMP21]]), !tbaa [[TBAA5:![0-9]+]] 163; CHECK-NEXT: [[TMP22:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 1 164; CHECK-NEXT: [[TMP23:%.*]] = bitcast <64 x i16>* [[TMP22]] to <128 x i8>* 165; CHECK-NEXT: [[TMP24:%.*]] = trunc <128 x i8> [[TMP14]] to <128 x i1> 166; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP12]], <128 x i8>* [[TMP23]], i32 128, <128 x i1> [[TMP24]]), !tbaa [[TBAA5]] 167; CHECK-NEXT: [[TMP25:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 2 168; CHECK-NEXT: [[TMP26:%.*]] = bitcast <64 x i16>* [[TMP25]] to <128 x i8>* 169; CHECK-NEXT: [[TMP27:%.*]] = trunc <128 x i8> [[TMP19]] to <128 x i1> 170; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP17]], <128 x i8>* [[TMP26]], i32 128, <128 x i1> [[TMP27]]), !tbaa [[TBAA5]] 171; CHECK-NEXT: ret void 172; 173b0: 174 %v0 = add i32 %a1, 64 175 %v1 = getelementptr i16, i16* %a0, i32 %v0 176 %v2 = bitcast i16* %v1 to <64 x i16>* 177 store <64 x i16> %a2, <64 x i16>* %v2, align 2, !tbaa !5 178 %v3 = add i32 %a1, 128 179 %v4 = getelementptr i16, i16* %a0, i32 %v3 180 %v5 = bitcast i16* %v4 to <64 x i16>* 181 store <64 x i16> %a3, <64 x i16>* %v5, align 2, !tbaa !5 182 ret void 183} 184 185; Two unaligned stores, only one with a TBAA tag. 186; 187define void @f4(i16* %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 { 188; CHECK-LABEL: @f4( 189; CHECK-NEXT: b0: 190; CHECK-NEXT: [[V0:%.*]] = add i32 [[A1:%.*]], 64 191; CHECK-NEXT: [[V1:%.*]] = getelementptr i16, i16* [[A0:%.*]], i32 [[V0]] 192; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[V1]] to i32 193; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], -128 194; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to <64 x i16>* 195; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint i16* [[V1]] to i32 196; CHECK-NEXT: [[TMP4:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32> 197; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[TMP4]], <32 x i32> undef, i32 [[TMP3]]) 198; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i32> [[TMP5]] to <128 x i8> 199; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> zeroinitializer, i32 [[TMP3]]) 200; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i32> [[TMP7]] to <128 x i8> 201; CHECK-NEXT: [[TMP9:%.*]] = bitcast <64 x i16> [[A3:%.*]] to <32 x i32> 202; CHECK-NEXT: [[TMP10:%.*]] = bitcast <64 x i16> [[A2]] to <32 x i32> 203; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[TMP9]], <32 x i32> [[TMP10]], i32 [[TMP3]]) 204; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x i32> [[TMP11]] to <128 x i8> 205; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[TMP3]]) 206; CHECK-NEXT: [[TMP14:%.*]] = bitcast <32 x i32> [[TMP13]] to <128 x i8> 207; CHECK-NEXT: [[TMP15:%.*]] = bitcast <64 x i16> [[A3]] to <32 x i32> 208; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> undef, <32 x i32> [[TMP15]], i32 [[TMP3]]) 209; CHECK-NEXT: [[TMP17:%.*]] = bitcast <32 x i32> [[TMP16]] to <128 x i8> 210; CHECK-NEXT: [[TMP18:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[TMP3]]) 211; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i32> [[TMP18]] to <128 x i8> 212; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP1]] to <128 x i8>* 213; CHECK-NEXT: [[TMP21:%.*]] = trunc <128 x i8> [[TMP8]] to <128 x i1> 214; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP6]], <128 x i8>* [[TMP20]], i32 128, <128 x i1> [[TMP21]]) 215; CHECK-NEXT: [[TMP22:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 1 216; CHECK-NEXT: [[TMP23:%.*]] = bitcast <64 x i16>* [[TMP22]] to <128 x i8>* 217; CHECK-NEXT: [[TMP24:%.*]] = trunc <128 x i8> [[TMP14]] to <128 x i1> 218; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP12]], <128 x i8>* [[TMP23]], i32 128, <128 x i1> [[TMP24]]) 219; CHECK-NEXT: [[TMP25:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 2 220; CHECK-NEXT: [[TMP26:%.*]] = bitcast <64 x i16>* [[TMP25]] to <128 x i8>* 221; CHECK-NEXT: [[TMP27:%.*]] = trunc <128 x i8> [[TMP19]] to <128 x i1> 222; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP17]], <128 x i8>* [[TMP26]], i32 128, <128 x i1> [[TMP27]]), !tbaa [[TBAA5]] 223; CHECK-NEXT: ret void 224; 225b0: 226 %v0 = add i32 %a1, 64 227 %v1 = getelementptr i16, i16* %a0, i32 %v0 228 %v2 = bitcast i16* %v1 to <64 x i16>* 229 store <64 x i16> %a2, <64 x i16>* %v2, align 2 230 %v3 = add i32 %a1, 128 231 %v4 = getelementptr i16, i16* %a0, i32 %v3 232 %v5 = bitcast i16* %v4 to <64 x i16>* 233 store <64 x i16> %a3, <64 x i16>* %v5, align 2, !tbaa !5 234 ret void 235} 236 237; Two unaligned store, with different TBAA tags. 238; 239define void @f5(i16* %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 { 240; CHECK-LABEL: @f5( 241; CHECK-NEXT: b0: 242; CHECK-NEXT: [[V0:%.*]] = add i32 [[A1:%.*]], 64 243; CHECK-NEXT: [[V1:%.*]] = getelementptr i16, i16* [[A0:%.*]], i32 [[V0]] 244; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[V1]] to i32 245; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], -128 246; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to <64 x i16>* 247; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint i16* [[V1]] to i32 248; CHECK-NEXT: [[TMP4:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32> 249; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[TMP4]], <32 x i32> undef, i32 [[TMP3]]) 250; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i32> [[TMP5]] to <128 x i8> 251; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> zeroinitializer, i32 [[TMP3]]) 252; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i32> [[TMP7]] to <128 x i8> 253; CHECK-NEXT: [[TMP9:%.*]] = bitcast <64 x i16> [[A3:%.*]] to <32 x i32> 254; CHECK-NEXT: [[TMP10:%.*]] = bitcast <64 x i16> [[A2]] to <32 x i32> 255; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[TMP9]], <32 x i32> [[TMP10]], i32 [[TMP3]]) 256; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x i32> [[TMP11]] to <128 x i8> 257; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[TMP3]]) 258; CHECK-NEXT: [[TMP14:%.*]] = bitcast <32 x i32> [[TMP13]] to <128 x i8> 259; CHECK-NEXT: [[TMP15:%.*]] = bitcast <64 x i16> [[A3]] to <32 x i32> 260; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> undef, <32 x i32> [[TMP15]], i32 [[TMP3]]) 261; CHECK-NEXT: [[TMP17:%.*]] = bitcast <32 x i32> [[TMP16]] to <128 x i8> 262; CHECK-NEXT: [[TMP18:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[TMP3]]) 263; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i32> [[TMP18]] to <128 x i8> 264; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP1]] to <128 x i8>* 265; CHECK-NEXT: [[TMP21:%.*]] = trunc <128 x i8> [[TMP8]] to <128 x i1> 266; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP6]], <128 x i8>* [[TMP20]], i32 128, <128 x i1> [[TMP21]]), !tbaa [[TBAA5]] 267; CHECK-NEXT: [[TMP22:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 1 268; CHECK-NEXT: [[TMP23:%.*]] = bitcast <64 x i16>* [[TMP22]] to <128 x i8>* 269; CHECK-NEXT: [[TMP24:%.*]] = trunc <128 x i8> [[TMP14]] to <128 x i1> 270; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP12]], <128 x i8>* [[TMP23]], i32 128, <128 x i1> [[TMP24]]) 271; CHECK-NEXT: [[TMP25:%.*]] = getelementptr <64 x i16>, <64 x i16>* [[TMP2]], i32 2 272; CHECK-NEXT: [[TMP26:%.*]] = bitcast <64 x i16>* [[TMP25]] to <128 x i8>* 273; CHECK-NEXT: [[TMP27:%.*]] = trunc <128 x i8> [[TMP19]] to <128 x i1> 274; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[TMP17]], <128 x i8>* [[TMP26]], i32 128, <128 x i1> [[TMP27]]), !tbaa [[TBAA7:![0-9]+]] 275; CHECK-NEXT: ret void 276; 277b0: 278 %v0 = add i32 %a1, 64 279 %v1 = getelementptr i16, i16* %a0, i32 %v0 280 %v2 = bitcast i16* %v1 to <64 x i16>* 281 store <64 x i16> %a2, <64 x i16>* %v2, align 2, !tbaa !5 282 %v3 = add i32 %a1, 128 283 %v4 = getelementptr i16, i16* %a0, i32 %v3 284 %v5 = bitcast i16* %v4 to <64 x i16>* 285 store <64 x i16> %a3, <64 x i16>* %v5, align 2, !tbaa !7 286 ret void 287} 288 289attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b" } 290 291!0 = !{!1, !1, i64 0} 292!1 = !{!"load type 1", !2} 293!2 = !{!"Simple C/C++ TBAA"} 294!3 = !{!4, !4, i64 0} 295!4 = !{!"load type 2", !2} 296!5 = !{!6, !6, i64 0} 297!6 = !{!"store type 1", !2} 298!7 = !{!8, !8, i64 0} 299!8 = !{!"store type 2", !2} 300