1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=SSE2 3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=SLM 4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX 5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX 6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX 7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX 8 9; 10; vXi8 11; 12 13define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) { 14; SSE2-LABEL: @loadext_2i8_to_2i64( 15; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 16; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 17; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 18; SSE2-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64 19; SSE2-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64 20; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 21; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 22; SSE2-NEXT: ret <2 x i64> [[V1]] 23; 24; SLM-LABEL: @loadext_2i8_to_2i64( 25; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>* 26; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1 27; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64> 28; SLM-NEXT: ret <2 x i64> [[TMP3]] 29; 30; AVX-LABEL: @loadext_2i8_to_2i64( 31; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>* 32; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1 33; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64> 34; AVX-NEXT: ret <2 x i64> [[TMP3]] 35; 36 %p1 = getelementptr inbounds i8, i8* %p0, i64 1 37 %i0 = load i8, i8* %p0, align 1 38 %i1 = load i8, i8* %p1, align 1 39 %x0 = zext i8 %i0 to i64 40 %x1 = zext i8 %i1 to i64 41 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0 42 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1 43 ret <2 x i64> %v1 44} 45 46define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) { 47; SSE2-LABEL: @loadext_4i8_to_4i32( 48; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* 49; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 50; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> 51; SSE2-NEXT: ret <4 x i32> [[TMP3]] 52; 53; SLM-LABEL: @loadext_4i8_to_4i32( 54; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* 55; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 56; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> 57; SLM-NEXT: ret <4 x i32> [[TMP3]] 58; 59; AVX-LABEL: @loadext_4i8_to_4i32( 60; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* 61; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 62; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> 63; AVX-NEXT: ret <4 x i32> [[TMP3]] 64; 65 %p1 = getelementptr inbounds i8, i8* %p0, i64 1 66 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 67 %p3 = getelementptr inbounds i8, i8* %p0, i64 3 68 %i0 = load i8, i8* %p0, align 1 69 %i1 = load i8, i8* %p1, align 1 70 %i2 = load i8, i8* %p2, align 1 71 %i3 = load i8, i8* %p3, align 1 72 %x0 = zext i8 %i0 to i32 73 %x1 = zext i8 %i1 to i32 74 %x2 = zext i8 %i2 to i32 75 %x3 = zext i8 %i3 to i32 76 %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0 77 %v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1 78 %v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2 79 %v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3 80 ret <4 x i32> %v3 81} 82 83define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) { 84; SSE2-LABEL: @loadext_4i8_to_4i64( 85; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* 86; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 87; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> 88; SSE2-NEXT: ret <4 x i64> [[TMP3]] 89; 90; SLM-LABEL: @loadext_4i8_to_4i64( 91; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* 92; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 93; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> 94; SLM-NEXT: ret <4 x i64> [[TMP3]] 95; 96; AVX-LABEL: @loadext_4i8_to_4i64( 97; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>* 98; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 99; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> 100; AVX-NEXT: ret <4 x i64> [[TMP3]] 101; 102 %p1 = getelementptr inbounds i8, i8* %p0, i64 1 103 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 104 %p3 = getelementptr inbounds i8, i8* %p0, i64 3 105 %i0 = load i8, i8* %p0, align 1 106 %i1 = load i8, i8* %p1, align 1 107 %i2 = load i8, i8* %p2, align 1 108 %i3 = load i8, i8* %p3, align 1 109 %x0 = zext i8 %i0 to i64 110 %x1 = zext i8 %i1 to i64 111 %x2 = zext i8 %i2 to i64 112 %x3 = zext i8 %i3 to i64 113 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0 114 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1 115 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2 116 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3 117 ret <4 x i64> %v3 118} 119 120define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) { 121; SSE2-LABEL: @loadext_8i8_to_8i16( 122; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* 123; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 124; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> 125; SSE2-NEXT: ret <8 x i16> [[TMP3]] 126; 127; SLM-LABEL: @loadext_8i8_to_8i16( 128; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* 129; SLM-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 130; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> 131; SLM-NEXT: ret <8 x i16> [[TMP3]] 132; 133; AVX-LABEL: @loadext_8i8_to_8i16( 134; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* 135; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 136; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> 137; AVX-NEXT: ret <8 x i16> [[TMP3]] 138; 139 %p1 = getelementptr inbounds i8, i8* %p0, i64 1 140 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 141 %p3 = getelementptr inbounds i8, i8* %p0, i64 3 142 %p4 = getelementptr inbounds i8, i8* %p0, i64 4 143 %p5 = getelementptr inbounds i8, i8* %p0, i64 5 144 %p6 = getelementptr inbounds i8, i8* %p0, i64 6 145 %p7 = getelementptr inbounds i8, i8* %p0, i64 7 146 %i0 = load i8, i8* %p0, align 1 147 %i1 = load i8, i8* %p1, align 1 148 %i2 = load i8, i8* %p2, align 1 149 %i3 = load i8, i8* %p3, align 1 150 %i4 = load i8, i8* %p4, align 1 151 %i5 = load i8, i8* %p5, align 1 152 %i6 = load i8, i8* %p6, align 1 153 %i7 = load i8, i8* %p7, align 1 154 %x0 = zext i8 %i0 to i16 155 %x1 = zext i8 %i1 to i16 156 %x2 = zext i8 %i2 to i16 157 %x3 = zext i8 %i3 to i16 158 %x4 = zext i8 %i4 to i16 159 %x5 = zext i8 %i5 to i16 160 %x6 = zext i8 %i6 to i16 161 %x7 = zext i8 %i7 to i16 162 %v0 = insertelement <8 x i16> undef, i16 %x0, i32 0 163 %v1 = insertelement <8 x i16> %v0, i16 %x1, i32 1 164 %v2 = insertelement <8 x i16> %v1, i16 %x2, i32 2 165 %v3 = insertelement <8 x i16> %v2, i16 %x3, i32 3 166 %v4 = insertelement <8 x i16> %v3, i16 %x4, i32 4 167 %v5 = insertelement <8 x i16> %v4, i16 %x5, i32 5 168 %v6 = insertelement <8 x i16> %v5, i16 %x6, i32 6 169 %v7 = insertelement <8 x i16> %v6, i16 %x7, i32 7 170 ret <8 x i16> %v7 171} 172 173define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) { 174; SSE2-LABEL: @loadext_8i8_to_8i32( 175; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* 176; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 177; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> 178; SSE2-NEXT: ret <8 x i32> [[TMP3]] 179; 180; SLM-LABEL: @loadext_8i8_to_8i32( 181; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* 182; SLM-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 183; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> 184; SLM-NEXT: ret <8 x i32> [[TMP3]] 185; 186; AVX-LABEL: @loadext_8i8_to_8i32( 187; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>* 188; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 189; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> 190; AVX-NEXT: ret <8 x i32> [[TMP3]] 191; 192 %p1 = getelementptr inbounds i8, i8* %p0, i64 1 193 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 194 %p3 = getelementptr inbounds i8, i8* %p0, i64 3 195 %p4 = getelementptr inbounds i8, i8* %p0, i64 4 196 %p5 = getelementptr inbounds i8, i8* %p0, i64 5 197 %p6 = getelementptr inbounds i8, i8* %p0, i64 6 198 %p7 = getelementptr inbounds i8, i8* %p0, i64 7 199 %i0 = load i8, i8* %p0, align 1 200 %i1 = load i8, i8* %p1, align 1 201 %i2 = load i8, i8* %p2, align 1 202 %i3 = load i8, i8* %p3, align 1 203 %i4 = load i8, i8* %p4, align 1 204 %i5 = load i8, i8* %p5, align 1 205 %i6 = load i8, i8* %p6, align 1 206 %i7 = load i8, i8* %p7, align 1 207 %x0 = zext i8 %i0 to i32 208 %x1 = zext i8 %i1 to i32 209 %x2 = zext i8 %i2 to i32 210 %x3 = zext i8 %i3 to i32 211 %x4 = zext i8 %i4 to i32 212 %x5 = zext i8 %i5 to i32 213 %x6 = zext i8 %i6 to i32 214 %x7 = zext i8 %i7 to i32 215 %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0 216 %v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1 217 %v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2 218 %v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3 219 %v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4 220 %v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5 221 %v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6 222 %v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7 223 ret <8 x i32> %v7 224} 225 226define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) { 227; SSE2-LABEL: @loadext_16i8_to_16i16( 228; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>* 229; SSE2-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 230; SSE2-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> 231; SSE2-NEXT: ret <16 x i16> [[TMP3]] 232; 233; SLM-LABEL: @loadext_16i8_to_16i16( 234; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>* 235; SLM-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 236; SLM-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> 237; SLM-NEXT: ret <16 x i16> [[TMP3]] 238; 239; AVX-LABEL: @loadext_16i8_to_16i16( 240; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>* 241; AVX-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 242; AVX-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> 243; AVX-NEXT: ret <16 x i16> [[TMP3]] 244; 245 %p1 = getelementptr inbounds i8, i8* %p0, i64 1 246 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 247 %p3 = getelementptr inbounds i8, i8* %p0, i64 3 248 %p4 = getelementptr inbounds i8, i8* %p0, i64 4 249 %p5 = getelementptr inbounds i8, i8* %p0, i64 5 250 %p6 = getelementptr inbounds i8, i8* %p0, i64 6 251 %p7 = getelementptr inbounds i8, i8* %p0, i64 7 252 %p8 = getelementptr inbounds i8, i8* %p0, i64 8 253 %p9 = getelementptr inbounds i8, i8* %p0, i64 9 254 %p10 = getelementptr inbounds i8, i8* %p0, i64 10 255 %p11 = getelementptr inbounds i8, i8* %p0, i64 11 256 %p12 = getelementptr inbounds i8, i8* %p0, i64 12 257 %p13 = getelementptr inbounds i8, i8* %p0, i64 13 258 %p14 = getelementptr inbounds i8, i8* %p0, i64 14 259 %p15 = getelementptr inbounds i8, i8* %p0, i64 15 260 %i0 = load i8, i8* %p0, align 1 261 %i1 = load i8, i8* %p1, align 1 262 %i2 = load i8, i8* %p2, align 1 263 %i3 = load i8, i8* %p3, align 1 264 %i4 = load i8, i8* %p4, align 1 265 %i5 = load i8, i8* %p5, align 1 266 %i6 = load i8, i8* %p6, align 1 267 %i7 = load i8, i8* %p7, align 1 268 %i8 = load i8, i8* %p8, align 1 269 %i9 = load i8, i8* %p9, align 1 270 %i10 = load i8, i8* %p10, align 1 271 %i11 = load i8, i8* %p11, align 1 272 %i12 = load i8, i8* %p12, align 1 273 %i13 = load i8, i8* %p13, align 1 274 %i14 = load i8, i8* %p14, align 1 275 %i15 = load i8, i8* %p15, align 1 276 %x0 = zext i8 %i0 to i16 277 %x1 = zext i8 %i1 to i16 278 %x2 = zext i8 %i2 to i16 279 %x3 = zext i8 %i3 to i16 280 %x4 = zext i8 %i4 to i16 281 %x5 = zext i8 %i5 to i16 282 %x6 = zext i8 %i6 to i16 283 %x7 = zext i8 %i7 to i16 284 %x8 = zext i8 %i8 to i16 285 %x9 = zext i8 %i9 to i16 286 %x10 = zext i8 %i10 to i16 287 %x11 = zext i8 %i11 to i16 288 %x12 = zext i8 %i12 to i16 289 %x13 = zext i8 %i13 to i16 290 %x14 = zext i8 %i14 to i16 291 %x15 = zext i8 %i15 to i16 292 %v0 = insertelement <16 x i16> undef, i16 %x0, i32 0 293 %v1 = insertelement <16 x i16> %v0, i16 %x1, i32 1 294 %v2 = insertelement <16 x i16> %v1, i16 %x2, i32 2 295 %v3 = insertelement <16 x i16> %v2, i16 %x3, i32 3 296 %v4 = insertelement <16 x i16> %v3, i16 %x4, i32 4 297 %v5 = insertelement <16 x i16> %v4, i16 %x5, i32 5 298 %v6 = insertelement <16 x i16> %v5, i16 %x6, i32 6 299 %v7 = insertelement <16 x i16> %v6, i16 %x7, i32 7 300 %v8 = insertelement <16 x i16> %v7, i16 %x8, i32 8 301 %v9 = insertelement <16 x i16> %v8, i16 %x9, i32 9 302 %v10 = insertelement <16 x i16> %v9, i16 %x10, i32 10 303 %v11 = insertelement <16 x i16> %v10, i16 %x11, i32 11 304 %v12 = insertelement <16 x i16> %v11, i16 %x12, i32 12 305 %v13 = insertelement <16 x i16> %v12, i16 %x13, i32 13 306 %v14 = insertelement <16 x i16> %v13, i16 %x14, i32 14 307 %v15 = insertelement <16 x i16> %v14, i16 %x15, i32 15 308 ret <16 x i16> %v15 309} 310 311; 312; vXi16 313; 314 315define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) { 316; SSE2-LABEL: @loadext_2i16_to_2i64( 317; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>* 318; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 319; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64> 320; SSE2-NEXT: ret <2 x i64> [[TMP3]] 321; 322; SLM-LABEL: @loadext_2i16_to_2i64( 323; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>* 324; SLM-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 325; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64> 326; SLM-NEXT: ret <2 x i64> [[TMP3]] 327; 328; AVX-LABEL: @loadext_2i16_to_2i64( 329; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>* 330; AVX-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 331; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64> 332; AVX-NEXT: ret <2 x i64> [[TMP3]] 333; 334 %p1 = getelementptr inbounds i16, i16* %p0, i64 1 335 %i0 = load i16, i16* %p0, align 1 336 %i1 = load i16, i16* %p1, align 1 337 %x0 = zext i16 %i0 to i64 338 %x1 = zext i16 %i1 to i64 339 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0 340 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1 341 ret <2 x i64> %v1 342} 343 344define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) { 345; SSE2-LABEL: @loadext_4i16_to_4i32( 346; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* 347; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 348; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> 349; SSE2-NEXT: ret <4 x i32> [[TMP3]] 350; 351; SLM-LABEL: @loadext_4i16_to_4i32( 352; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* 353; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 354; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> 355; SLM-NEXT: ret <4 x i32> [[TMP3]] 356; 357; AVX-LABEL: @loadext_4i16_to_4i32( 358; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* 359; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 360; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> 361; AVX-NEXT: ret <4 x i32> [[TMP3]] 362; 363 %p1 = getelementptr inbounds i16, i16* %p0, i64 1 364 %p2 = getelementptr inbounds i16, i16* %p0, i64 2 365 %p3 = getelementptr inbounds i16, i16* %p0, i64 3 366 %i0 = load i16, i16* %p0, align 1 367 %i1 = load i16, i16* %p1, align 1 368 %i2 = load i16, i16* %p2, align 1 369 %i3 = load i16, i16* %p3, align 1 370 %x0 = zext i16 %i0 to i32 371 %x1 = zext i16 %i1 to i32 372 %x2 = zext i16 %i2 to i32 373 %x3 = zext i16 %i3 to i32 374 %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0 375 %v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1 376 %v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2 377 %v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3 378 ret <4 x i32> %v3 379} 380 381define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) { 382; SSE2-LABEL: @loadext_4i16_to_4i64( 383; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* 384; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 385; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> 386; SSE2-NEXT: ret <4 x i64> [[TMP3]] 387; 388; SLM-LABEL: @loadext_4i16_to_4i64( 389; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* 390; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 391; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> 392; SLM-NEXT: ret <4 x i64> [[TMP3]] 393; 394; AVX-LABEL: @loadext_4i16_to_4i64( 395; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>* 396; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 397; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> 398; AVX-NEXT: ret <4 x i64> [[TMP3]] 399; 400 %p1 = getelementptr inbounds i16, i16* %p0, i64 1 401 %p2 = getelementptr inbounds i16, i16* %p0, i64 2 402 %p3 = getelementptr inbounds i16, i16* %p0, i64 3 403 %i0 = load i16, i16* %p0, align 1 404 %i1 = load i16, i16* %p1, align 1 405 %i2 = load i16, i16* %p2, align 1 406 %i3 = load i16, i16* %p3, align 1 407 %x0 = zext i16 %i0 to i64 408 %x1 = zext i16 %i1 to i64 409 %x2 = zext i16 %i2 to i64 410 %x3 = zext i16 %i3 to i64 411 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0 412 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1 413 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2 414 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3 415 ret <4 x i64> %v3 416} 417 418define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) { 419; SSE2-LABEL: @loadext_8i16_to_8i32( 420; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>* 421; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 422; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> 423; SSE2-NEXT: ret <8 x i32> [[TMP3]] 424; 425; SLM-LABEL: @loadext_8i16_to_8i32( 426; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>* 427; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 428; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> 429; SLM-NEXT: ret <8 x i32> [[TMP3]] 430; 431; AVX-LABEL: @loadext_8i16_to_8i32( 432; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>* 433; AVX-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 434; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> 435; AVX-NEXT: ret <8 x i32> [[TMP3]] 436; 437 %p1 = getelementptr inbounds i16, i16* %p0, i64 1 438 %p2 = getelementptr inbounds i16, i16* %p0, i64 2 439 %p3 = getelementptr inbounds i16, i16* %p0, i64 3 440 %p4 = getelementptr inbounds i16, i16* %p0, i64 4 441 %p5 = getelementptr inbounds i16, i16* %p0, i64 5 442 %p6 = getelementptr inbounds i16, i16* %p0, i64 6 443 %p7 = getelementptr inbounds i16, i16* %p0, i64 7 444 %i0 = load i16, i16* %p0, align 1 445 %i1 = load i16, i16* %p1, align 1 446 %i2 = load i16, i16* %p2, align 1 447 %i3 = load i16, i16* %p3, align 1 448 %i4 = load i16, i16* %p4, align 1 449 %i5 = load i16, i16* %p5, align 1 450 %i6 = load i16, i16* %p6, align 1 451 %i7 = load i16, i16* %p7, align 1 452 %x0 = zext i16 %i0 to i32 453 %x1 = zext i16 %i1 to i32 454 %x2 = zext i16 %i2 to i32 455 %x3 = zext i16 %i3 to i32 456 %x4 = zext i16 %i4 to i32 457 %x5 = zext i16 %i5 to i32 458 %x6 = zext i16 %i6 to i32 459 %x7 = zext i16 %i7 to i32 460 %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0 461 %v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1 462 %v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2 463 %v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3 464 %v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4 465 %v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5 466 %v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6 467 %v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7 468 ret <8 x i32> %v7 469} 470 471; 472; vXi32 473; 474 475define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) { 476; SSE2-LABEL: @loadext_2i32_to_2i64( 477; SSE2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>* 478; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 479; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> 480; SSE2-NEXT: ret <2 x i64> [[TMP3]] 481; 482; SLM-LABEL: @loadext_2i32_to_2i64( 483; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>* 484; SLM-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 485; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> 486; SLM-NEXT: ret <2 x i64> [[TMP3]] 487; 488; AVX-LABEL: @loadext_2i32_to_2i64( 489; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>* 490; AVX-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 491; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> 492; AVX-NEXT: ret <2 x i64> [[TMP3]] 493; 494 %p1 = getelementptr inbounds i32, i32* %p0, i64 1 495 %i0 = load i32, i32* %p0, align 1 496 %i1 = load i32, i32* %p1, align 1 497 %x0 = zext i32 %i0 to i64 498 %x1 = zext i32 %i1 to i64 499 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0 500 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1 501 ret <2 x i64> %v1 502} 503 504define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) { 505; SSE2-LABEL: @loadext_4i32_to_4i64( 506; SSE2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>* 507; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 508; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> 509; SSE2-NEXT: ret <4 x i64> [[TMP3]] 510; 511; SLM-LABEL: @loadext_4i32_to_4i64( 512; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>* 513; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 514; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> 515; SLM-NEXT: ret <4 x i64> [[TMP3]] 516; 517; AVX-LABEL: @loadext_4i32_to_4i64( 518; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>* 519; AVX-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 520; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> 521; AVX-NEXT: ret <4 x i64> [[TMP3]] 522; 523 %p1 = getelementptr inbounds i32, i32* %p0, i64 1 524 %p2 = getelementptr inbounds i32, i32* %p0, i64 2 525 %p3 = getelementptr inbounds i32, i32* %p0, i64 3 526 %i0 = load i32, i32* %p0, align 1 527 %i1 = load i32, i32* %p1, align 1 528 %i2 = load i32, i32* %p2, align 1 529 %i3 = load i32, i32* %p3, align 1 530 %x0 = zext i32 %i0 to i64 531 %x1 = zext i32 %i1 to i64 532 %x2 = zext i32 %i2 to i64 533 %x3 = zext i32 %i3 to i64 534 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0 535 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1 536 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2 537 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3 538 ret <4 x i64> %v3 539} 540